Consolidate Examples: memtable.rs and parquet_multiple_files.rs (#13913) #36384
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
name: Rust | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
on: | |
push: | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- ".github/ISSUE_TEMPLATE/**" | |
- ".github/pull_request_template.md" | |
pull_request: | |
paths-ignore: | |
- "docs/**" | |
- "**.md" | |
- ".github/ISSUE_TEMPLATE/**" | |
- ".github/pull_request_template.md" | |
# manual trigger | |
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow | |
workflow_dispatch: | |
jobs: | |
# Check license header | |
license-header-check: | |
runs-on: ubuntu-20.04 | |
name: Check License Header | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: korandoru/hawkeye@v5 | |
# Check crate compiles and base cargo check passes | |
linux-build-lib: | |
name: linux build test | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Prepare cargo build | |
run: cargo check --profile ci --all-targets | |
# cargo check common, functions and substrait with no default features | |
linux-cargo-check-no-default-features: | |
name: cargo check no default features | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Check datafusion without default features | |
# Some of the test binaries require the parquet feature still | |
#run: cargo check --all-targets --no-default-features -p datafusion | |
run: cargo check --profile ci --no-default-features -p datafusion | |
- name: Check datafusion-common without default features | |
run: cargo check --profile ci --all-targets --no-default-features -p datafusion-common | |
- name: Check datafusion-functions without default features | |
run: cargo check --profile ci --all-targets --no-default-features -p datafusion-functions | |
- name: Check datafusion-substrait without default features | |
run: cargo check --profile ci --all-targets --no-default-features -p datafusion-substrait | |
- name: Check workspace in debug mode | |
run: cargo check --profile ci --all-targets --workspace | |
- name: Check workspace with avro,json features | |
run: cargo check --profile ci --workspace --benches --features avro,json | |
- name: Check Cargo.lock for datafusion-cli | |
run: | | |
# If this test fails, try running `cargo update` in the `datafusion-cli` directory | |
# and check in the updated Cargo.lock file. | |
cargo check --profile ci --manifest-path datafusion-cli/Cargo.toml --locked | |
# cargo check datafusion to ensure that the datafusion crate can be built with only a | |
# subset of the function packages enabled. | |
linux-cargo-check-datafusion: | |
name: cargo check datafusion | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Check datafusion (nested_expressions) | |
run: cargo check --profile ci --no-default-features --features=nested_expressions -p datafusion | |
- name: Check datafusion (crypto) | |
run: cargo check --profile ci --no-default-features --features=crypto_expressions -p datafusion | |
- name: Check datafusion (datetime_expressions) | |
run: cargo check --profile ci --no-default-features --features=datetime_expressions -p datafusion | |
- name: Check datafusion (encoding_expressions) | |
run: cargo check --profile ci --no-default-features --features=encoding_expressions -p datafusion | |
- name: Check datafusion (math_expressions) | |
run: cargo check --profile ci --no-default-features --features=math_expressions -p datafusion | |
- name: Check datafusion (regex_expressions) | |
run: cargo check --profile ci --no-default-features --features=regex_expressions -p datafusion | |
- name: Check datafusion (string_expressions) | |
run: cargo check --profile ci --no-default-features --features=string_expressions -p datafusion | |
# cargo check datafusion-functions to ensure that the datafusion-functions crate can be built with | |
# only a subset of the function packages enabled. | |
linux-cargo-check-datafusion-functions: | |
name: cargo check functions | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Check datafusion-functions (crypto) | |
run: cargo check --profile ci --all-targets --no-default-features --features=crypto_expressions -p datafusion-functions | |
- name: Check datafusion-functions (datetime_expressions) | |
run: cargo check --profile ci --all-targets --no-default-features --features=datetime_expressions -p datafusion-functions | |
- name: Check datafusion-functions (encoding_expressions) | |
run: cargo check --profile ci --all-targets --no-default-features --features=encoding_expressions -p datafusion-functions | |
- name: Check datafusion-functions (math_expressions) | |
run: cargo check --profile ci --all-targets --no-default-features --features=math_expressions -p datafusion-functions | |
- name: Check datafusion-functions (regex_expressions) | |
run: cargo check --profile ci --all-targets --no-default-features --features=regex_expressions -p datafusion-functions | |
- name: Check datafusion-functions (string_expressions) | |
run: cargo check --profile ci --all-targets --no-default-features --features=string_expressions -p datafusion-functions | |
# Run tests | |
linux-test: | |
name: cargo test (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run tests (excluding doctests) | |
run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace | |
- name: Verify Working Directory Clean | |
run: git diff --exit-code | |
linux-test-datafusion-cli: | |
name: cargo test datafusion-cli (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run tests (excluding doctests) | |
run: | | |
cd datafusion-cli | |
cargo test --profile ci --lib --tests --bins --all-features | |
- name: Verify Working Directory Clean | |
run: git diff --exit-code | |
linux-test-example: | |
name: cargo examples (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run examples | |
run: | | |
# test datafusion-sql examples | |
cargo run --profile ci --example sql | |
# test datafusion-examples | |
ci/scripts/rust_example.sh | |
- name: Verify Working Directory Clean | |
run: git diff --exit-code | |
# Run `cargo test doc` (test documentation examples) | |
linux-test-doc: | |
name: cargo test doc (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run doctests | |
run: | | |
cargo test --profile ci --doc --features avro,json | |
cd datafusion-cli | |
cargo test --profile ci --doc --all-features | |
- name: Verify Working Directory Clean | |
run: git diff --exit-code | |
# Run `cargo doc` to ensure the rustdoc is clean | |
linux-rustdoc: | |
name: cargo doc | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run cargo doc | |
run: ci/scripts/rust_docs.sh | |
linux-wasm-pack: | |
name: build with wasm-pack | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Install wasm-pack | |
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh | |
- name: Build with wasm-pack | |
working-directory: ./datafusion/wasmtest | |
run: wasm-pack build --dev | |
# verify that the benchmark queries return the correct results | |
verify-benchmark-results: | |
name: verify benchmark results (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Generate benchmark data and expected query results | |
run: | | |
mkdir -p datafusion/sqllogictest/test_files/tpch/data | |
git clone https://github.com/databricks/tpch-dbgen.git | |
cd tpch-dbgen | |
make | |
./dbgen -f -s 0.1 | |
mv *.tbl ../datafusion/sqllogictest/test_files/tpch/data | |
- name: Verify that benchmark queries return expected results | |
run: | | |
# increase stack size to fix stack overflow | |
export RUST_MIN_STACK=20971520 | |
export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data` | |
cargo test plan_q --package datafusion-benchmarks --profile ci --features=ci -- --test-threads=1 | |
INCLUDE_TPCH=true cargo test --profile ci --package datafusion-sqllogictest --test sqllogictests | |
- name: Verify Working Directory Clean | |
run: git diff --exit-code | |
sqllogictest-postgres: | |
name: "Run sqllogictest with Postgres runner" | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
services: | |
postgres: | |
image: postgres:15 | |
env: | |
POSTGRES_PASSWORD: postgres | |
POSTGRES_DB: db_test | |
POSTGRES_INITDB_ARGS: --encoding=UTF-8 --lc-collate=C --lc-ctype=C | |
ports: | |
- 5432:5432 | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run sqllogictest | |
run: | | |
cd datafusion/sqllogictest | |
PG_COMPAT=true PG_URI="postgresql://postgres:postgres@$POSTGRES_HOST:$POSTGRES_PORT/db_test" cargo test --profile ci --features=postgres --test sqllogictests | |
env: | |
# use postgres for the host here because we have specified a container for the job | |
POSTGRES_HOST: postgres | |
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }} | |
# Temporarily commenting out the Windows flow, the reason is enormously slow running build | |
# Waiting for new Windows 2025 github runner | |
# Details: https://github.com/apache/datafusion/issues/13726 | |
# | |
# windows: | |
# name: cargo test (win64) | |
# runs-on: windows-latest | |
# steps: | |
# - uses: actions/checkout@v4 | |
# with: | |
# submodules: true | |
# - name: Setup Rust toolchain | |
# uses: ./.github/actions/setup-windows-builder | |
# - name: Run tests (excluding doctests) | |
# shell: bash | |
# run: | | |
# export PATH=$PATH:$HOME/d/protoc/bin | |
# cargo test --lib --tests --bins --features avro,json,backtrace | |
# cd datafusion-cli | |
# cargo test --lib --tests --bins --all-features | |
# Commenting out intel mac build as so few users would ever use it | |
# Details: https://github.com/apache/datafusion/issues/13846 | |
# macos: | |
# name: cargo test (macos) | |
# runs-on: macos-latest | |
# steps: | |
# - uses: actions/checkout@v4 | |
# with: | |
# submodules: true | |
# fetch-depth: 1 | |
# - name: Setup Rust toolchain | |
# uses: ./.github/actions/setup-macos-builder | |
# - name: Run tests (excluding doctests) | |
# shell: bash | |
# run: | | |
# cargo test run --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace | |
# cd datafusion-cli | |
# cargo test run --profile ci --lib --tests --bins --all-features | |
macos-aarch64: | |
name: cargo test (macos-aarch64) | |
runs-on: macos-14 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-macos-aarch64-builder | |
- name: Run tests (excluding doctests) | |
shell: bash | |
run: | | |
cargo test --profile ci --lib --tests --bins --features avro,json,backtrace | |
cd datafusion-cli | |
cargo test --profile ci --lib --tests --bins --all-features | |
test-datafusion-pyarrow: | |
name: cargo test pyarrow (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-20.04 | |
container: | |
image: amd64/rust:bullseye # Workaround https://github.com/actions/setup-python/issues/721 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.8" | |
- name: Install PyArrow | |
run: | | |
echo "LIBRARY_PATH=$LD_LIBRARY_PATH" >> $GITHUB_ENV | |
python -m pip install pyarrow | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run datafusion-common tests | |
run: cargo test --profile ci -p datafusion-common --features=pyarrow | |
vendor: | |
name: Verify Vendored Code | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run gen | |
run: ./regen.sh | |
working-directory: ./datafusion/proto | |
- name: Verify workspace clean (if this fails, run ./datafusion/proto/regen.sh and check in results) | |
run: git diff --exit-code | |
check-fmt: | |
name: Check cargo fmt | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run | |
run: | | |
echo '' > datafusion/proto/src/generated/datafusion.rs | |
ci/scripts/rust_fmt.sh | |
# Coverage job disabled due to | |
# https://github.com/apache/datafusion/issues/3678 | |
# coverage: | |
# name: coverage | |
# runs-on: ubuntu-latest | |
# steps: | |
# - uses: actions/checkout@v4 | |
# with: | |
# submodules: true | |
# - name: Install protobuf compiler | |
# shell: bash | |
# run: | | |
# mkdir -p $HOME/d/protoc | |
# cd $HOME/d/protoc | |
# export PROTO_ZIP="protoc-21.4-linux-x86_64.zip" | |
# curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP | |
# unzip $PROTO_ZIP | |
# export PATH=$PATH:$HOME/d/protoc/bin | |
# protoc --version | |
# - name: Setup Rust toolchain | |
# run: | | |
# rustup toolchain install stable | |
# rustup default stable | |
# rustup component add rustfmt clippy | |
# - name: Cache Cargo | |
# uses: actions/cache@v4 | |
# with: | |
# path: /home/runner/.cargo | |
# # this key is not equal because the user is different than on a container (runner vs github) | |
# key: cargo-coverage-cache3- | |
# - name: Run coverage | |
# run: | | |
# export PATH=$PATH:$HOME/d/protoc/bin | |
# rustup toolchain install stable | |
# rustup default stable | |
# cargo install --version 0.20.1 cargo-tarpaulin | |
# cargo tarpaulin --all --out Xml | |
# - name: Report coverage | |
# continue-on-error: true | |
# run: bash <(curl -s https://codecov.io/bash) | |
clippy: | |
name: clippy | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Install Clippy | |
run: rustup component add clippy | |
- name: Run clippy | |
run: ci/scripts/rust_clippy.sh | |
# Check answers are correct when hash values collide | |
hash-collisions: | |
name: cargo test hash collisions (amd64) | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Run tests | |
run: | | |
cd datafusion | |
cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --exclude datafusion-sqllogictest --workspace --lib --tests --features=force_hash_collisions,avro | |
cargo-toml-formatting-checks: | |
name: check Cargo.toml formatting | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- name: Install taplo | |
run: cargo +stable install taplo-cli --version ^0.9 --locked | |
# if you encounter an error, try running 'taplo format' to fix the formatting automatically. | |
- name: Check Cargo.toml formatting | |
run: taplo format --check | |
config-docs-check: | |
name: check configs.md and ***_functions.md is up-to-date | |
needs: linux-build-lib | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
fetch-depth: 1 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
with: | |
rust-version: stable | |
- uses: actions/setup-node@v4 | |
with: | |
node-version: "20" | |
- name: Check if configs.md has been modified | |
run: | | |
# If you encounter an error, run './dev/update_config_docs.sh' and commit | |
./dev/update_config_docs.sh | |
git diff --exit-code | |
- name: Check if any of the ***_functions.md has been modified | |
run: | | |
# If you encounter an error, run './dev/update_function_docs.sh' and commit | |
./dev/update_function_docs.sh | |
git diff --exit-code | |
# Verify MSRV for the crates which are directly used by other projects: | |
# - datafusion | |
# - datafusion-substrait | |
# - datafusion-proto | |
# - datafusion-cli | |
msrv: | |
name: Verify MSRV (Min Supported Rust Version) | |
runs-on: ubuntu-latest | |
container: | |
image: amd64/rust | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup Rust toolchain | |
uses: ./.github/actions/setup-builder | |
- name: Install cargo-msrv | |
run: cargo install cargo-msrv | |
- name: Check datafusion | |
working-directory: datafusion/core | |
run: | | |
# If you encounter an error with any of the commands below it means | |
# your code or some crate in the dependency tree has a higher MSRV | |
# (Min Supported Rust Version) than the one specified in the | |
# `rust-version` key of `Cargo.toml`. | |
# | |
# To reproduce: | |
# 1. Install the version of Rust that is failing. Example: | |
# rustup install 1.80.1 | |
# 2. Run the command that failed with that version. Example: | |
# cargo +1.80.1 check -p datafusion | |
# | |
# To resolve, either: | |
# 1. Change your code to use older Rust features, | |
# 2. Revert dependency update | |
# 3. Update the MSRV version in `Cargo.toml` | |
# | |
# Please see the DataFusion Rust Version Compatibility Policy before | |
# updating Cargo.toml. You may have to update the code instead. | |
# https://github.com/apache/datafusion/blob/main/README.md#rust-version-compatibility-policy | |
cargo msrv --output-format json --log-target stdout verify | |
- name: Check datafusion-substrait | |
working-directory: datafusion/substrait | |
run: cargo msrv --output-format json --log-target stdout verify | |
- name: Check datafusion-proto | |
working-directory: datafusion/proto | |
run: cargo msrv --output-format json --log-target stdout verify | |
- name: Check datafusion-cli | |
working-directory: datafusion-cli | |
run: cargo msrv --output-format json --log-target stdout verify |