Upgrade DataFusion from 31.0.0 to 40.1.0 #163

Workflow file for this run

.github/workflows/regression.yml at 539daf4

	name: Regression
	on:
	workflow_dispatch:
	repository_dispatch:
	pull_request:
	paths-ignore:
	- '**.md'
	- '.github/workflows/**'
	- '!.github/workflows/Regression.yml'

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref \|\| '' }}-${{ github.base_ref \|\| '' }}-${{ github.ref != 'refs/heads/master' \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	regression-test-benchmark-runner-solo-solutions:
	strategy:
	fail-fast: false
	matrix:
	solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, R-arrow, duckdb, datafusion, dask, clickhouse]
	name: Regression Tests solo solutions
	runs-on: ubuntu-20.04
	env:
	CC: gcc-10
	CXX: g++-10
	GEN: ninja

	steps:
	- uses: actions/checkout@v3
	with:
	fetch-depth: 0

	- uses: actions/setup-python@v4
	with:
	python-version: '3.10'

	- name: Install libraries
	shell: bash
	run: ./_utils/setup-small.sh

	- name: Generate 500mb datasets
	shell: bash
	run: ./_utils/generate-data-small.sh

	- name: Remove old logs
	shell: bash
	run: rm time.csv logs.csv

	- name: Install all solutions
	shell: bash
	run: source path.env && python3 _utils/install_all_solutions.py ${{ matrix.solution }}

	- name: Turn swap off
	shell: bash
	run: sudo swapoff -a

	# needed because clickhouse for some reason produces an error the first
	# time a benchmark is run. The next benchmark run will work and overwrite the
	# old benchmark files.
	- name: Run mini GroupBy benchmark if clickhouse
	shell: bash
	if: ${{ matrix.solution == 'clickhouse' \|\| matrix.solution == 'all' }}
	run: \|
	python3 _utils/prep_solutions.py --task=groupby --solution=clickhouse
	source path.env
	TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh

	- name: Run mini GroupBy benchmark
	shell: bash
	run: \|
	python3 _utils/prep_solutions.py --task=groupby --solution=${{ matrix.solution }}
	source path.env
	TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh

	- name: Run mini Join benchmark
	shell: bash
	run: \|
	python3 _utils/prep_solutions.py --task=join --solution=${{ matrix.solution }}
	source path.env
	TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh

	- name: Validate benchmark results and report generation
	shell: bash
	run: ./_utils/validate_no_errors.sh

	- name: Create Archive
	if: always()
	shell: bash
	run: \|
	cp *.csv out/
	zip -r ${{ matrix.solution }}-out.zip out/

	# include this step to see what the latest versions are of every solution
	- name: Print latest versions
	if: always()
	shell: bash
	run: tail -n +1 */VERSION

	- uses: actions/upload-artifact@v3
	if: always()
	with:
	name: ${{ matrix.solution }}-out.zip
	path: ${{ matrix.solution }}-out.zip
	if-no-files-found: error

	regression-test-benchmark-runner-all-solutions:
	needs: regression-test-benchmark-runner-solo-solutions
	name: Regression Tests all solutions
	runs-on: ubuntu-20.04
	env:
	CC: gcc-10
	CXX: g++-10
	GEN: ninja

	steps:
	- uses: actions/checkout@v3
	with:
	fetch-depth: 0

	- uses: actions/setup-python@v4
	with:
	python-version: '3.10'

	- name: Install libraries
	shell: bash
	run: ./_utils/setup-small.sh

	- name: Generate 500mb datasets
	shell: bash
	run: ./_utils/generate-data-small.sh

	- name: Remove old logs
	shell: bash
	run: rm time.csv logs.csv

	- name: Install all solutions
	shell: bash
	run: source path.env && python3 _utils/install_all_solutions.py all

	- name: Turn swap off
	shell: bash
	run: sudo swapoff -a

	- name: Run mini GroupBy benchmark
	shell: bash
	run: \|
	python3 _utils/prep_solutions.py --task=groupby --solution=all
	source path.env
	TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh

	- name: Run mini Join benchmark
	shell: bash
	run: \|
	python3 _utils/prep_solutions.py --task=join --solution=all
	source path.env
	TEST_RUN=true TEST_MOUNT_DIR=$GITHUB_WORKSPACE ./run.sh

	- name: Validate benchmark results and report generation
	shell: bash
	run: ./_utils/validate_no_errors.sh

	- name: Create Archive
	if: always()
	shell: bash
	run: \|
	cp *.csv out/
	zip -r all-out.zip out/

	# include this step to see what the latest versions are of every solution
	- name: Print latest versions
	if: always()
	shell: bash
	run: tail -n +1 */VERSION

	- uses: actions/upload-artifact@v3
	if: always()
	with:
	name: all-out.zip
	path: all-out.zip
	if-no-files-found: error

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Upgrade DataFusion from 31.0.0 to 40.1.0 #163

Workflow file

Upgrade DataFusion from 31.0.0 to 40.1.0 #163

Jobs

Run details

Workflow file for this run