Skip to content

Commit

Permalink
feat: Clickhouse data source (#2300)
Browse files Browse the repository at this point in the history
Adds support for clickhouse as a data source.

`read_clickhouse`:
```
> select * from read_clickhouse('clickhouse://localhost:9000/default', 'bikeshare_stations') limit 1;
┌────────────┬──────────┬────────┬─────────┬───┬─────────────────┬───────┬──────────────────┬───────────────────┐
│ station_id │ name     │ status │ address │ … │ footprint_width │ notes │ council_district │ modified_date     │
│         ── │ ──       │ ──     │ ──      │   │              ── │ ──    │               ── │ ──                │
│      Int32 │ Utf8     │ Utf8   │ Utf8    │   │         Float32 │ Utf8  │            Int32 │ Timestamp<s, UTC> │
╞════════════╪══════════╪════════╪═════════╪═══╪═════════════════╪═══════╪══════════════════╪═══════════════════╡
│          0 │ South C… │ active │ 1901 S… │ … │            10.0 │ In t… │                9 │ 2022-03-04T09:01… │
└────────────┴──────────┴────────┴─────────┴───┴─────────────────┴───────┴──────────────────┴───────────────────┘
```

External database:
```
> create external database ch
::: from clickhouse
::: options ( connection_string = 'clickhouse://localhost:9000/default' );
Database created
> select status, address from ch.default.bikeshare_stations limit 1;
┌────────┬──────────────────────────┐
│ status │ address                  │
│ ──     │ ──                       │
│ Utf8   │ Utf8                     │
╞════════╪══════════════════════════╡
│ active │ 1901 South Congress Ave. │
└────────┴──────────────────────────┘
```

External table:
```
> create external table stations
::: from clickhouse
::: options ( connection_string = 'clickhouse://localhost:9000/default',
:::           table = 'bikeshare_stations' );
Table created
> select council_district, modified_date from stations limit 1;
┌──────────────────┬─────────────────────┐
│ council_district │ modified_date       │
│               ── │ ──                  │
│            Int32 │ Timestamp<s, UTC>   │
╞══════════════════╪═════════════════════╡
│                9 │ 2022-03-04T09:01:00 │
└──────────────────┴─────────────────────┘
```

---

Follow up items: #2315
  • Loading branch information
scsmithr authored Dec 28, 2023
1 parent ccb6fcf commit eaf795a
Show file tree
Hide file tree
Showing 29 changed files with 1,103 additions and 33 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,10 @@ jobs:
# Prepare SLT (MongoDB)
export MONGO_CONN_STRING=$(./scripts/create-test-mongo-db.sh)
# Prepare SLT (Clickhouse)
source ./scripts/ci-install-clickhouse.sh
export CLICKHOUSE_CONN_STRING=$(./scripts/create-test-clickhouse-db.sh)
# Prepare SLT (SQL Server)
export SQL_SERVER_CONN_STRING=$(./scripts/create-test-sqlserver-db.sh)
Expand Down Expand Up @@ -313,22 +317,23 @@ jobs:
just sql-logic-tests --protocol=rpc 'sqllogictests_native/*'
just sql-logic-tests --protocol=rpc 'sqllogictests_object_store/*'
just sql-logic-tests --protocol=rpc 'sqllogictests_sqlserver/*'
just sql-logic-tests --protocol=rpc 'sqllogictests_clickhouse/*'
just sql-logic-tests --protocol=rpc --exclude '*/tunnels/ssh' 'sqllogictests_mongodb/*'
just sql-logic-tests --protocol=rpc --exclude '*/tunnels/ssh' 'sqllogictests_mysql/*'
just sql-logic-tests --protocol=rpc --exclude '*/tunnels/ssh' 'sqllogictests_postgres/*'
echo "-------------------------- REMOTE DATA STORAGE TESTS --------------------------------"
# Test using a remote object store for storing databases and catalog
# MinIO (S3)
just sql-logic-tests --location http://localhost:9000 \
just sql-logic-tests --location http://localhost:9100 \
--option access_key_id=$MINIO_ACCESS_KEY \
--option secret_access_key=$MINIO_SECRET_KEY \
--option bucket=$TEST_BUCKET \
'sqllogictests/*' \
'sqllogictests_native/*'
# MinIO (S3) but with a sub-directory path
just slt -l http://localhost:9000/$TEST_BUCKET/path/to/folder \
just slt -l http://localhost:9100/$TEST_BUCKET/path/to/folder \
-o access_key_id=$MINIO_ACCESS_KEY \
-o secret_access_key=$MINIO_SECRET_KEY \
'sqllogictests/*'
Expand Down
52 changes: 52 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/datasources/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ bitflags = "2.4"
bitvec = "1"
bytes = "1.4.0"
chrono = { workspace = true }
chrono-tz = "0.8.4"
clickhouse-rs = { version = "1.1.0-alpha.1"}
datafusion = { workspace = true }
decimal = { path = "../decimal" }
deltalake = { workspace = true }
Expand Down
13 changes: 13 additions & 0 deletions crates/datasources/src/clickhouse/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#[derive(Debug, thiserror::Error)]
pub enum ClickhouseError {
#[error(transparent)]
Clickhouse(#[from] clickhouse_rs::errors::Error),
#[error(transparent)]
UrlParse(#[from] url::ParseError),
#[error(transparent)]
Arrow(#[from] datafusion::arrow::error::ArrowError),
#[error("{0}")]
String(String),
}

pub type Result<T, E = ClickhouseError> = std::result::Result<T, E>;
Loading

0 comments on commit eaf795a

Please sign in to comment.