TPC-H Benchmarks #82
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: TPC-H Benchmarks | |
on: | |
workflow_dispatch: | |
inputs: | |
scale: | |
description: 'Scale Factor' | |
required: true | |
default: 10000 | |
type: choice | |
options: | |
- 10000 | |
- 1000 | |
- 100 | |
- 10 | |
- 1 | |
dask: | |
description: 'Dask' | |
required: true | |
default: true | |
type: boolean | |
duckdb: | |
description: 'DuckDB' | |
required: true | |
default: true | |
type: boolean | |
polars: | |
description: 'Polars' | |
required: true | |
default: false | |
type: boolean | |
pyspark: | |
description: 'PySpark' | |
required: true | |
default: true | |
type: boolean | |
defaults: | |
# Required shell entrypoint to have properly activated conda environments | |
run: | |
shell: bash -l {0} | |
jobs: | |
tpch: | |
name: TPC-H | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Set up environment | |
uses: conda-incubator/setup-miniconda@v3 | |
with: | |
miniforge-variant: Mambaforge | |
use-mamba: true | |
condarc-file: ci/condarc | |
python-version: "3.9" | |
environment-file: ci/environment.yml | |
- name: Add TPC-H non-dask dependencies | |
run: mamba env update --file ci/environment-tpch-nondask.yml | |
- name: Upgrade dask to git tip | |
run: mamba env update --file ci/environment-git-tip.yml | |
- name: Add test dependencies | |
run: mamba env update --file ci/environment-test.yml | |
- name: Dump environment | |
run: | | |
# For debugging | |
echo -e "--\n--Conda Environment (re-create this with \`conda env create --name <name> -f <output_file>\`)\n--" | |
mamba env export | grep -E -v '^prefix:.*$' | |
- name: Add Dask to benchmark if enabled | |
if: ${{ inputs.dask }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_dask.py" >> $GITHUB_ENV | |
- name: Add DuckDB to benchmark if enabled | |
if: ${{ inputs.duckdb }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_duckdb.py" >> $GITHUB_ENV | |
- name: Add Polars to benchmark if enabled | |
if: ${{ inputs.polars }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_polars.py" >> $GITHUB_ENV | |
- name: Add PySpark to benchmark if enabled | |
if: ${{ inputs.pyspark }} | |
run: | | |
echo PYTEST_BENCHMARKS="${{ env.PYTEST_BENCHMARKS }} tests/tpch/test_pyspark.py" >> $GITHUB_ENV | |
- name: Run TPC-H benchmarks (except polars) | |
env: | |
DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }} | |
AWS_ACCESS_KEY_ID: ${{ secrets.RUNTIME_CI_BOT_AWS_ACCESS_KEY_ID }} | |
AWS_DEFAULT_REGION: us-east-2 # this is needed for boto for some reason | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.RUNTIME_CI_BOT_AWS_SECRET_ACCESS_KEY }} | |
COILED_RUNTIME_VERSION: ${{ matrix.runtime-version }} | |
DB_NAME: tpch_${{ inputs.scale }}.db | |
CLUSTER_DUMP: always | |
DASK_DATAFRAME__QUERY_PLANNING: True | |
run: | | |
pytest --benchmark \ | |
${{ env.PYTEST_BENCHMARKS }} \ | |
-n 4 --dist loadscope \ | |
--scale ${{ inputs.scale }} | |
- name: Upload benchmark results | |
uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: tpch-benchmark | |
path: | | |
tpch_${{ inputs.scale }}.db | |
mamba_env_export.yml |