Upgrade DataFusion from 31.0.0 to 40.1.0 #163
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Regression | |
on: | |
workflow_dispatch: | |
repository_dispatch: | |
pull_request: | |
paths-ignore: | |
- '**.md' | |
- '.github/workflows/**' | |
- '!.github/workflows/Regression.yml' | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/master' || github.sha }} | |
cancel-in-progress: true | |
jobs: | |
regression-test-benchmark-runner-solo-solutions: | |
strategy: | |
fail-fast: false | |
matrix: | |
solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, R-arrow, duckdb, datafusion, dask, clickhouse] | |
name: Regression Tests solo solutions | |
runs-on: ubuntu-20.04 | |
env: | |
CC: gcc-10 | |
CXX: g++-10 | |
GEN: ninja | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install libraries | |
shell: bash | |
run: ./_utils/setup-small.sh | |
- name: Generate 500mb datasets | |
shell: bash | |
run: ./_utils/generate-data-small.sh | |
- name: Remove old logs | |
shell: bash | |
run: rm time.csv logs.csv | |
- name: Install all solutions | |
shell: bash | |
run: source path.env && python3 _utils/install_all_solutions.py ${{ matrix.solution }} | |
- name: Turn swap off | |
shell: bash | |
run: sudo swapoff -a | |
# needed because clickhouse for some reason produces an error the first | |
# time a benchmark is run. The next benchmark run will work and overwrite the | |
# old benchmark files. | |
- name: Run mini GroupBy benchmark if clickhouse | |
shell: bash | |
if: ${{ matrix.solution == 'clickhouse' || matrix.solution == 'all' }} | |
run: | | |
python3 _utils/prep_solutions.py --task=groupby --solution=clickhouse | |
source path.env | |
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh | |
- name: Run mini GroupBy benchmark | |
shell: bash | |
run: | | |
python3 _utils/prep_solutions.py --task=groupby --solution=${{ matrix.solution }} | |
source path.env | |
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh | |
- name: Run mini Join benchmark | |
shell: bash | |
run: | | |
python3 _utils/prep_solutions.py --task=join --solution=${{ matrix.solution }} | |
source path.env | |
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh | |
- name: Validate benchmark results and report generation | |
shell: bash | |
run: ./_utils/validate_no_errors.sh | |
- name: Create Archive | |
if: always() | |
shell: bash | |
run: | | |
cp *.csv out/ | |
zip -r ${{ matrix.solution }}-out.zip out/ | |
# include this step to see what the latest versions are of every solution | |
- name: Print latest versions | |
if: always() | |
shell: bash | |
run: tail -n +1 */VERSION | |
- uses: actions/upload-artifact@v3 | |
if: always() | |
with: | |
name: ${{ matrix.solution }}-out.zip | |
path: ${{ matrix.solution }}-out.zip | |
if-no-files-found: error | |
regression-test-benchmark-runner-all-solutions: | |
needs: regression-test-benchmark-runner-solo-solutions | |
name: Regression Tests all solutions | |
runs-on: ubuntu-20.04 | |
env: | |
CC: gcc-10 | |
CXX: g++-10 | |
GEN: ninja | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install libraries | |
shell: bash | |
run: ./_utils/setup-small.sh | |
- name: Generate 500mb datasets | |
shell: bash | |
run: ./_utils/generate-data-small.sh | |
- name: Remove old logs | |
shell: bash | |
run: rm time.csv logs.csv | |
- name: Install all solutions | |
shell: bash | |
run: source path.env && python3 _utils/install_all_solutions.py all | |
- name: Turn swap off | |
shell: bash | |
run: sudo swapoff -a | |
- name: Run mini GroupBy benchmark | |
shell: bash | |
run: | | |
python3 _utils/prep_solutions.py --task=groupby --solution=all | |
source path.env | |
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh | |
- name: Run mini Join benchmark | |
shell: bash | |
run: | | |
python3 _utils/prep_solutions.py --task=join --solution=all | |
source path.env | |
TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh | |
- name: Validate benchmark results and report generation | |
shell: bash | |
run: ./_utils/validate_no_errors.sh | |
- name: Create Archive | |
if: always() | |
shell: bash | |
run: | | |
cp *.csv out/ | |
zip -r all-out.zip out/ | |
# include this step to see what the latest versions are of every solution | |
- name: Print latest versions | |
if: always() | |
shell: bash | |
run: tail -n +1 */VERSION | |
- uses: actions/upload-artifact@v3 | |
if: always() | |
with: | |
name: all-out.zip | |
path: all-out.zip | |
if-no-files-found: error | |