From 93ca42cd27188c1f6725210679d348db647ff624 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 8 Sep 2023 09:30:00 +0200 Subject: [PATCH] WIP --- .../workflows/api-binary-compatibility.yml | 61 -------- .github/workflows/delta-conversion-ci.yml | 114 -------------- .github/workflows/flink-ci.yml | 88 ----------- .github/workflows/hive-ci.yml | 109 ------------- .github/workflows/java-ci.yml | 100 ------------ .github/workflows/jmh-benchmarks.yml | 103 ------------ .github/workflows/labeler.yml | 34 ---- .github/workflows/license_check.yml | 29 ---- .github/workflows/open-api.yml | 60 ------- .github/workflows/publish-snapshot.yml | 44 ------ .github/workflows/python-ci-docs.yml | 56 ------- .github/workflows/python-ci.yml | 63 -------- .github/workflows/python-integration.yml | 53 ------- .github/workflows/python-release.yml | 11 +- .../workflows/recurring-jmh-benchmarks.yml | 73 --------- .github/workflows/spark-ci.yml | 146 ------------------ .github/workflows/stale.yml | 49 ------ python/build-module.py | 37 ++--- python/pyiceberg/avro/decoder_fast.pyx | 2 +- python/pyproject.toml | 2 +- python/tests/conftest.py | 22 ++- 21 files changed, 40 insertions(+), 1216 deletions(-) delete mode 100644 .github/workflows/api-binary-compatibility.yml delete mode 100644 .github/workflows/delta-conversion-ci.yml delete mode 100644 .github/workflows/flink-ci.yml delete mode 100644 .github/workflows/hive-ci.yml delete mode 100644 .github/workflows/java-ci.yml delete mode 100644 .github/workflows/jmh-benchmarks.yml delete mode 100644 .github/workflows/labeler.yml delete mode 100644 .github/workflows/license_check.yml delete mode 100644 .github/workflows/open-api.yml delete mode 100644 .github/workflows/publish-snapshot.yml delete mode 100644 .github/workflows/python-ci-docs.yml delete mode 100644 .github/workflows/python-ci.yml delete mode 100644 .github/workflows/python-integration.yml delete mode 100644 .github/workflows/recurring-jmh-benchmarks.yml delete mode 100644 .github/workflows/spark-ci.yml delete mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/api-binary-compatibility.yml b/.github/workflows/api-binary-compatibility.yml deleted file mode 100644 index d14d38d37d90..000000000000 --- a/.github/workflows/api-binary-compatibility.yml +++ /dev/null @@ -1,61 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "API Binary Compatibility Checks" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths: - - 'api/**' - - '.palantir/revapi.yml' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - revapi: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - with: - # fetch-depth of zero ensures that the tags are pulled in and we're not in a detached HEAD state - # revapi depends on the tags, specifically the tag from git describe, to find the relevant override - # in the .palantir/revapi.yml file - # - # See https://github.com/actions/checkout/issues/124 - fetch-depth: 0 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 11 - - run: | - echo "Using the old version tag, as per git describe, of $(git describe)"; - - run: ./gradlew revapi --rerun-tasks - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs diff --git a/.github/workflows/delta-conversion-ci.yml b/.github/workflows/delta-conversion-ci.yml deleted file mode 100644 index ec72fee6a2b9..000000000000 --- a/.github/workflows/delta-conversion-ci.yml +++ /dev/null @@ -1,114 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Delta Conversion CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths-ignore: - - '.github/ISSUE_TEMPLATE/iceberg_bug_report.yml' - - '.github/workflows/python-ci.yml' - - '.github/workflows/flink-ci.yml' - - '.github/workflows/hive-ci.yml' - - '.gitignore' - - '.asf.yml' - - 'dev/**' - - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - - 'flink/**' - - 'pig/**' - - 'python/**' - - 'docs/**' - - 'open-api/**' - - 'format/**' - - '.gitattributes' - - 'README.md' - - 'CONTRIBUTING.md' - - 'LICENSE' - - 'NOTICE' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - delta-conversion-scala-2-12-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11, 17] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions=3.3 -DscalaVersion=2.12 -DhiveVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs - - delta-conversion-scala-2-13-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11, 17] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions=3.3 -DscalaVersion=2.13 -DhiveVersions= -DflinkVersions= :iceberg-delta-lake:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml deleted file mode 100644 index 66c3e198023e..000000000000 --- a/.github/workflows/flink-ci.yml +++ /dev/null @@ -1,88 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Flink CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths-ignore: - - '.github/ISSUE_TEMPLATE/iceberg_bug_report.yml' - - '.github/workflows/python-ci.yml' - - '.github/workflows/spark-ci.yml' - - '.github/workflows/hive-ci.yml' - - '.gitignore' - - '.asf.yml' - - 'dev/**' - - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - - 'spark/**' - - 'pig/**' - - 'python/**' - - 'docs/**' - - 'open-api/**' - - 'format/**' - - '.gitattributes' - - 'README.md' - - 'CONTRIBUTING.md' - - 'LICENSE' - - 'NOTICE' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - - # Test all flink versions with scala 2.12 for general validation. - flink-scala-2-12-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11] - flink: ['1.15', '1.16', '1.17'] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions= -DhiveVersions= -DflinkVersions=${{ matrix.flink }} :iceberg-flink:iceberg-flink-${{ matrix.flink }}:check :iceberg-flink:iceberg-flink-runtime-${{ matrix.flink }}:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml deleted file mode 100644 index 11e4e3e9303d..000000000000 --- a/.github/workflows/hive-ci.yml +++ /dev/null @@ -1,109 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Hive CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths-ignore: - - '.github/ISSUE_TEMPLATE/iceberg_bug_report.yml' - - '.github/workflows/python-ci.yml' - - '.github/workflows/spark-ci.yml' - - '.github/workflows/flink-ci.yml' - - '.gitignore' - - '.asf.yml' - - 'dev/**' - - 'arrow/**' - - 'spark/**' - - 'flink/**' - - 'pig/**' - - 'python/**' - - 'docs/**' - - 'open-api/**' - - 'format/**' - - '.gitattributes' - - 'README.md' - - 'CONTRIBUTING.md' - - 'LICENSE' - - 'NOTICE' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - hive2-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11, 17] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs - - hive3-tests: - runs-on: ubuntu-22.04 - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 8 - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml deleted file mode 100644 index 19b20bcb07cf..000000000000 --- a/.github/workflows/java-ci.yml +++ /dev/null @@ -1,100 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Java CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths-ignore: - - '.github/ISSUE_TEMPLATE/iceberg_bug_report.yml' - - '.github/workflows/python-ci.yml' - - '.github/workflows/spark-ci.yml' - - '.github/workflows/flink-ci.yml' - - '.github/workflows/hive-ci.yml' - - '.gitignore' - - '.asf.yml' - - 'dev/**' - - 'python/**' - - 'docs/**' - - 'open-api/**' - - 'format/**' - - '.gitattributes' - - 'README.md' - - 'CONTRIBUTING.md' - - 'LICENSE' - - 'NOTICE' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - core-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11, 17] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs - - build-checks: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 8 - - run: ./gradlew -DallVersions build -x test -x javadoc -x integrationTest - - build-javadoc: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 8 - - run: ./gradlew -Pquick=true javadoc diff --git a/.github/workflows/jmh-benchmarks.yml b/.github/workflows/jmh-benchmarks.yml deleted file mode 100644 index 2e95baaeb917..000000000000 --- a/.github/workflows/jmh-benchmarks.yml +++ /dev/null @@ -1,103 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "JMH Benchmarks" -on: - workflow_dispatch: - inputs: - repo: - description: 'Repo name with owner, such as apache/iceberg' - required: true - ref: - description: 'The branch name' - required: true - spark_version: - description: 'The spark project version to use, such as iceberg-spark-3.4' - default: 'iceberg-spark-3.4' - required: true - benchmarks: - description: 'A list of comma-separated double-quoted Benchmark names, such as "IcebergSourceFlatParquetDataReadBenchmark", "IcebergSourceFlatParquetDataFilterBenchmark"' - required: true - -jobs: - matrix: - runs-on: ubuntu-22.04 - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - foundlabel: ${{ steps.set-matrix.outputs.foundlabel }} - steps: - - uses: actions/checkout@v3 - with: - repository: ${{ github.event.inputs.repo }} - ref: ${{ github.event.inputs.ref }} - - id: set-matrix - run: | - matrix=$(echo '[${{ github.event.inputs.benchmarks }}]' | jq '.[] | select(endswith("Benchmark")) | .') - matrix=$(echo $matrix | sed 's/ /,/g' | sed 's/"/\"/g') - echo "::set-output name=matrix::[$matrix]" - echo "::set-output name=foundlabel::$(echo "[$matrix]" | jq 'if . | length > 0 then true else false end')" - - show-matrix: - needs: matrix - runs-on: ubuntu-22.04 - steps: - - run: | - echo "Repo: ${{ github.event.inputs.repo }}" - echo "Ref: ${{ github.event.inputs.ref }}" - echo "Benchmarks: ${{ needs.matrix.outputs.matrix }}" - echo "Spark Project Version: ${{ needs.matrix.outputs.spark_version }}" - echo "Found Benchmarks? ${{ needs.matrix.outputs.foundlabel }}" - - run-benchmark: - if: ${{ needs.matrix.outputs.foundlabel == 'true' }} - needs: matrix - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - benchmark: ${{ fromJson(needs.matrix.outputs.matrix) }} - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - with: - repository: ${{ github.event.inputs.repo }} - ref: ${{ github.event.inputs.ref }} - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 11 - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - - name: Run Benchmark - run: ./gradlew :iceberg-spark:${{ github.event.inputs.spark_version }}:jmh -PjmhIncludeRegex=${{ matrix.benchmark }} -PjmhOutputPath=benchmark/${{ matrix.benchmark }}.txt - - - uses: actions/upload-artifact@v3 - if: ${{ always() }} - with: - name: benchmark-results - path: | - **/benchmark/*.txt diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index c4cad7aef2dc..000000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,34 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Pull Request Labeler" -on: pull_request_target - -permissions: - contents: read - pull-requests: write - -jobs: - triage: - runs-on: ubuntu-22.04 - steps: - - uses: actions/labeler@v4 - with: - repo-token: "${{ secrets.GITHUB_TOKEN }}" - sync-labels: true diff --git a/.github/workflows/license_check.yml b/.github/workflows/license_check.yml deleted file mode 100644 index b0d99755d375..000000000000 --- a/.github/workflows/license_check.yml +++ /dev/null @@ -1,29 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Run License Check" -on: pull_request - -jobs: - rat: - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - - run: | - dev/check-license diff --git a/.github/workflows/open-api.yml b/.github/workflows/open-api.yml deleted file mode 100644 index 0929198eb637..000000000000 --- a/.github/workflows/open-api.yml +++ /dev/null @@ -1,60 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Open-API" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths: - - '.github/workflows/open-api.yml' - - 'open-api/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - openapi-spec-validator: - runs-on: ubuntu-22.04 - - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Install - working-directory: ./open-api - run: make install - - name: Validate REST catalog spec - working-directory: ./open-api - run: make lint - - name: Generate REST catalog spec Python code - working-directory: ./open-api - run: make generate - - name: Check if code is up to date - working-directory: ./open-api - run: git diff --exit-code - - name: Validate S3 REST Signer spec - working-directory: ./aws/src/main/resources - run: openapi-spec-validator s3-signer-open-api.yaml diff --git a/.github/workflows/publish-snapshot.yml b/.github/workflows/publish-snapshot.yml deleted file mode 100644 index eb8b79f1a8f6..000000000000 --- a/.github/workflows/publish-snapshot.yml +++ /dev/null @@ -1,44 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Publish Snapshot to Maven" -on: - schedule: - # * is a special character in YAML so you have to quote this string - # this schedules a workflow to run at specific UTC times using POSIX cron syntax -> https://crontab.guru/ - # we're publishing a new snapshot every night at 00:00 UTC - - cron: '0 0 * * *' - -jobs: - publish-snapshot: - if: github.repository_owner == 'apache' - runs-on: ubuntu-22.04 - steps: - - uses: actions/checkout@v3 - with: - # we need to fetch all tags so that getProjectVersion() in build.gradle correctly determines the next SNAPSHOT version from the newest tag - fetch-depth: 0 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 8 - - run: | - ./gradlew printVersion - ./gradlew -DallVersions publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }} - ./gradlew -DflinkVersions= -DsparkVersions=3.2,3.3,3.4 -DscalaVersion=2.13 -DhiveVersions= publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }} diff --git a/.github/workflows/python-ci-docs.yml b/.github/workflows/python-ci-docs.yml deleted file mode 100644 index 4b732df21364..000000000000 --- a/.github/workflows/python-ci-docs.yml +++ /dev/null @@ -1,56 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Python Docs" -on: - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - docs: - runs-on: ubuntu-22.04 - - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - name: Install - working-directory: ./python/mkdocs - run: pip install -r requirements.txt - - name: Build - working-directory: ./python/mkdocs - run: mkdocs build --strict - - name: Copy - working-directory: ./python/mkdocs - run: mv ./site /tmp/site - - name: Push changes to gh-pages branch - run: | - git checkout --orphan gh-pages-tmp - git rm --quiet -rf . - cp -r /tmp/site/* . - git config --global user.name 'GitHub Actions' - git config --global user.email 'actions@github.com' - echo "py.iceberg.apache.org" > CNAME - git add --all - git commit -m 'Publish Python docs' - git push -f origin gh-pages-tmp:gh-pages || true diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml deleted file mode 100644 index e31d0f90d364..000000000000 --- a/.github/workflows/python-ci.yml +++ /dev/null @@ -1,63 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Python CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths: - - '.github/workflows/python-ci.yml' - - 'python/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - lint-and-test: - runs-on: ubuntu-22.04 - strategy: - matrix: - python: ['3.8', '3.9', '3.10', '3.11'] - - steps: - - uses: actions/checkout@v3 - - name: Install poetry - working-directory: ./python - run: make install-poetry - - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - cache: poetry - cache-dependency-path: | - ./python/poetry.lock - - name: Install - working-directory: ./python - run: make install-dependencies - - name: Linters - working-directory: ./python - run: make lint - - name: Tests - working-directory: ./python - run: make test-coverage diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml deleted file mode 100644 index 5709b1f061b4..000000000000 --- a/.github/workflows/python-integration.yml +++ /dev/null @@ -1,53 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Python Integration" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths: - - '.github/workflows/python-integration.yml' - - 'python/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - integration-test: - runs-on: ubuntu-20.04 - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - name: Install - run: make install - working-directory: ./python - - name: Run integration tests - run: make test-integration - working-directory: ./python - - name: Show debug logs - if: ${{ failure() }} - run: docker-compose -f python/dev/docker-compose.yml logs diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index e310acdcb11e..4730fdf805ee 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -34,7 +34,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ ubuntu-20.04, windows-2019, macos-11 ] + os: [ubuntu-22.04, windows-2022] #, macos-11 steps: - uses: actions/checkout@v3 @@ -46,8 +46,7 @@ jobs: python-version: '3.8' - name: Install poetry - run: pip3 install poetry - working-directory: ./python + run: pip install poetry - name: Set version run: python3 -m poetry version "${{ inputs.version }}" @@ -57,7 +56,7 @@ jobs: # Publish the source distribution with the version that's in # the repository, otherwise the tests will fail - name: Compile source distribution - run: python3 -m poetry build --format=sdist + run: python -m poetry build --format=sdist if: "${{ matrix.os == 'ubuntu-20.04' }}" working-directory: ./python @@ -69,6 +68,10 @@ jobs: config-file: "python/pyproject.toml" env: CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8,<3.12" + CIBW_ARCHS_WINDOWS: "auto64" + CIBW_TEST_REQUIRES: "pytest moto" + CIBW_TEST_EXTRAS: "s3fs,glue" + CIBW_TEST_COMMAND: "find {project}/python && pytest -Werror {project}/python/tests/avro/test_decoder.py" - name: Add source distribution if: "${{ matrix.os == 'ubuntu-20.04' }}" diff --git a/.github/workflows/recurring-jmh-benchmarks.yml b/.github/workflows/recurring-jmh-benchmarks.yml deleted file mode 100644 index b864dc11202d..000000000000 --- a/.github/workflows/recurring-jmh-benchmarks.yml +++ /dev/null @@ -1,73 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Recurring JMH Benchmarks" -on: - schedule: - # * is a special character in YAML so you have to quote this string - # this schedules a workflow to run at specific UTC times using POSIX cron syntax -> https://crontab.guru/ - # we're running benchmarks every Sunday at 00:00 UTC - - cron: '0 0 * * 0' - -jobs: - run-benchmark: - if: github.repository_owner == 'apache' - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - # TODO: "IcebergSortCompactionBenchmark" seems to run indefinitely - benchmark: ["SparkParquetReadersFlatDataBenchmark", "SparkParquetReadersNestedDataBenchmark", - "SparkParquetWritersFlatDataBenchmark", "SparkParquetWritersNestedDataBenchmark", - "IcebergSourceFlatParquetDataFilterBenchmark", - "IcebergSourceFlatParquetDataReadBenchmark", "IcebergSourceFlatParquetDataWriteBenchmark", - "IcebergSourceNestedListParquetDataWriteBenchmark", "IcebergSourceNestedParquetDataFilterBenchmark", - "IcebergSourceNestedParquetDataReadBenchmark", "IcebergSourceNestedParquetDataWriteBenchmark", - "IcebergSourceParquetEqDeleteBenchmark", "IcebergSourceParquetMultiDeleteFileBenchmark", - "IcebergSourceParquetPosDeleteBenchmark", "IcebergSourceParquetWithUnrelatedDeleteBenchmark"] - spark_version: ['iceberg-spark-3.4'] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - with: - repository: ${{ github.event.inputs.repo }} - ref: ${{ github.event.inputs.ref }} - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 11 - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - - name: Run Benchmark - run: ./gradlew :iceberg-spark:${{ matrix.spark_version }}:jmh -PjmhIncludeRegex=${{ matrix.benchmark }} -PjmhOutputPath=benchmark/${{ matrix.benchmark }}.txt -PjmhJsonOutputPath=benchmark/${{ matrix.benchmark }}.json - - - uses: actions/upload-artifact@v3 - if: ${{ always() }} - with: - name: benchmark-results - path: | - **/benchmark/* diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml deleted file mode 100644 index b360e0f95eb5..000000000000 --- a/.github/workflows/spark-ci.yml +++ /dev/null @@ -1,146 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Spark CI" -on: - push: - branches: - - 'master' - - '0.**' - tags: - - 'apache-iceberg-**' - pull_request: - paths-ignore: - - '.github/ISSUE_TEMPLATE/iceberg_bug_report.yml' - - '.github/workflows/python-ci.yml' - - '.github/workflows/flink-ci.yml' - - '.github/workflows/hive-ci.yml' - - '.gitignore' - - '.asf.yml' - - 'dev/**' - - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - - 'flink/**' - - 'pig/**' - - 'python/**' - - 'docs/**' - - 'open-api/**' - - 'format/**' - - '.gitattributes' - - 'README.md' - - 'CONTRIBUTING.md' - - 'LICENSE' - - 'NOTICE' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -jobs: - spark-3x-scala-2-12-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11] - spark: ['3.1', '3.2', '3.3', '3.4'] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=2.12 -DhiveVersions= -DflinkVersions= :iceberg-spark:iceberg-spark-${{ matrix.spark }}_2.12:check :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_2.12:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_2.12:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs - - spark-3x-scala-2-13-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [8, 11] - spark: ['3.2','3.3','3.4'] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=2.13 -DhiveVersions= -DflinkVersions= :iceberg-spark:iceberg-spark-${{ matrix.spark }}_2.13:check :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_2.13:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_2.13:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs - - spark-3x-java-17-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - spark: ['3.3','3.4'] - scala-version: ['2.12', '2.13'] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-java@v3 - with: - distribution: zulu - java-version: 17 - - uses: actions/cache@v3 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala-version }} -DhiveVersions= -DflinkVersions= :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala-version }}:check -Pquick=true -x javadoc - - uses: actions/upload-artifact@v3 - if: failure() - with: - name: test logs - path: | - **/build/testlogs \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index 566ae2441ea0..000000000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,49 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Close Stale Issues" -on: - schedule: - - cron: '0 0 * * *' - -permissions: - # All other permissions are set to none - issues: write - -jobs: - stale: - if: github.repository_owner == 'apache' - runs-on: ubuntu-22.04 - steps: - - uses: actions/stale@v8.0.0 - with: - stale-issue-label: 'stale' - exempt-issue-labels: 'not-stale' - days-before-issue-stale: 180 - days-before-issue-close: 14 - # Only close stale issues, leave PRs alone - days-before-pr-stale: -1 - stale-issue-message: > - This issue has been automatically marked as stale because it has been open for 180 days - with no activity. It will be closed in next 14 days if no further activity occurs. To - permanently prevent this issue from being considered stale, add the label 'not-stale', - but commenting on the issue is preferred when possible. - close-issue-message: > - This issue has been closed because it has not received any activity in the last 14 days - since being marked as 'stale' diff --git a/python/build-module.py b/python/build-module.py index c6794ec62410..118329daf0a3 100644 --- a/python/build-module.py +++ b/python/build-module.py @@ -19,14 +19,13 @@ import shutil from pathlib import Path -# Uncommend if your library can still function if extensions fail to compile. -allowed_to_fail = False -# allowed_to_fail = os.environ.get("CIBUILDWHEEL", "0") != "1" +# When we build the wheels, it needs to succeed +allowed_to_fail = "CIBUILDWHEEL" not in os.environ def build_cython_extensions() -> None: - import Cython.Compiler.Options # pyright: ignore [reportMissingImports] - from Cython.Build import build_ext, cythonize # pyright: ignore [reportMissingImports] + import Cython.Compiler.Options + from Cython.Build import build_ext, cythonize from setuptools import Extension from setuptools.dist import Distribution @@ -40,28 +39,20 @@ def build_cython_extensions() -> None: extra_compile_args = [ "-O3", ] - # Relative to project root directory - include_dirs = { - "pyiceberg/", - } - extensions = [ - Extension( + package_path = "pyiceberg" + + extension = Extension( # Your .pyx file will be available to cpython at this location. - "pyiceberg.avro.decoder_fast", - [ - "pyiceberg/avro/decoder_fast.pyx", + name="pyiceberg.avro.decoder_fast", + sources=[ + os.path.join(package_path, "avro", "decoder_fast.pyx"), ], - include_dirs=list(include_dirs), extra_compile_args=extra_compile_args, language="c", - ), - ] - - for extension in extensions: - include_dirs.update(extension.include_dirs) + ) - ext_modules = cythonize(extensions, include_path=list(include_dirs), language_level=3, annotate=True) + ext_modules = cythonize([extension], include_path=list(package_path)) dist = Distribution({"ext_modules": ext_modules}) cmd = build_ext(dist) cmd.ensure_finalized() @@ -69,9 +60,9 @@ def build_cython_extensions() -> None: cmd.run() for output in cmd.get_outputs(): - output = Path(output) - relative_extension = output.relative_to(cmd.build_lib) + relative_extension = os.path.relpath(output, cmd.build_lib) shutil.copyfile(output, relative_extension) + print(f"OUTPUTTTTT {output}") try: diff --git a/python/pyiceberg/avro/decoder_fast.pyx b/python/pyiceberg/avro/decoder_fast.pyx index cff7752c7730..067534b92cf9 100644 --- a/python/pyiceberg/avro/decoder_fast.pyx +++ b/python/pyiceberg/avro/decoder_fast.pyx @@ -79,7 +79,7 @@ cdef class CythonBinaryDecoder: A boolean is written as a single byte whose value is either 0 (false) or 1 (true). """ - self._current += 1; + self._current += 1 return self._current[-1] != 0 cpdef inline long read_int(self): diff --git a/python/pyproject.toml b/python/pyproject.toml index 2a4ee643881a..bace8f4f675c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -41,7 +41,7 @@ packages = [ ] include = [ { path = "dev", format = "sdist" }, - { path = "pyiceberg/**/*.so", format = "wheel" } + { path = "pyiceberg/**/*.so", format = "wheel" }, ] [tool.poetry.dependencies] diff --git a/python/tests/conftest.py b/python/tests/conftest.py index e01f982e52cc..ed7f1caa2184 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -32,6 +32,7 @@ from random import choice from tempfile import TemporaryDirectory from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -50,10 +51,8 @@ import boto3 import botocore.awsrequest import botocore.model -import pyarrow as pa import pytest from moto import mock_dynamodb, mock_glue, mock_s3 -from pyarrow import parquet as pq from pyiceberg import schema from pyiceberg.catalog import Catalog @@ -69,7 +68,6 @@ load_file_io, ) from pyiceberg.io.fsspec import FsspecFileIO -from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO from pyiceberg.manifest import DataFile, FileFormat from pyiceberg.schema import Schema from pyiceberg.serializers import ToOutputFile @@ -91,6 +89,9 @@ ) from pyiceberg.utils.datetime import datetime_to_millis +if TYPE_CHECKING: + from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO + def pytest_collection_modifyitems(items: List[pytest.Item]) -> None: for item in items: @@ -421,6 +422,8 @@ def example_table_metadata_v2() -> Dict[str, Any]: @pytest.fixture(scope="session") def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str: + from pyiceberg.io.pyarrow import PyArrowFileIO + metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.metadata.json") metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2) ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) @@ -429,6 +432,8 @@ def metadata_location(tmp_path_factory: pytest.TempPathFactory) -> str: @pytest.fixture(scope="session") def metadata_location_gz(tmp_path_factory: pytest.TempPathFactory) -> str: + from pyiceberg.io.pyarrow import PyArrowFileIO + metadata_location = str(tmp_path_factory.mktemp("metadata") / f"{uuid.uuid4()}.gz.metadata.json") metadata = TableMetadataV2(**EXAMPLE_TABLE_METADATA_V2) ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) @@ -1146,7 +1151,9 @@ def __len__(self) -> int: def exists(self) -> bool: return os.path.exists(self._path) - def to_input_file(self) -> PyArrowFile: + def to_input_file(self) -> "PyArrowFile": + from pyiceberg.io.pyarrow import PyArrowFileIO + return PyArrowFileIO().new_input(location=self.location) def create(self, overwrite: bool = False) -> OutputStream: @@ -1399,7 +1406,9 @@ def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO: @pytest.fixture -def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> PyArrowFileIO: +def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO": + from pyiceberg.io.pyarrow import PyArrowFileIO + properties = { GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"), GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"), @@ -1620,6 +1629,9 @@ def clean_up(test_catalog: Catalog) -> None: @pytest.fixture def data_file(table_schema_simple: Schema, tmp_path: str) -> str: + import pyarrow as pa + from pyarrow import parquet as pq + table = pa.table( {"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": [True, False, None]}, metadata={"iceberg.schema": table_schema_simple.model_dump_json()},