From 745eb52a694e94c4324d9e368b5d830237e7269c Mon Sep 17 00:00:00 2001 From: kokokuo Date: Thu, 6 Jul 2023 14:38:01 +0800 Subject: [PATCH] feat(docs): add ClickHouse connector to docs package --- packages/doc/docs/connect/clickhouse.mdx | 82 +++++++++++++++++ packages/doc/docs/connect/overview.mdx | 22 +++-- packages/doc/docs/connectors.mdx | 1 + packages/doc/docs/connectors/clickhouse.mdx | 87 +++++++++++++++++++ .../extension-driver-clickhouse/README.md | 4 +- .../extension-driver-clickhouse/package.json | 4 +- .../src/lib/clickhouseDataSource.ts | 2 + 7 files changed, 188 insertions(+), 14 deletions(-) create mode 100644 packages/doc/docs/connect/clickhouse.mdx create mode 100644 packages/doc/docs/connectors/clickhouse.mdx diff --git a/packages/doc/docs/connect/clickhouse.mdx b/packages/doc/docs/connect/clickhouse.mdx new file mode 100644 index 000000000..aa4be78ec --- /dev/null +++ b/packages/doc/docs/connect/clickhouse.mdx @@ -0,0 +1,82 @@ +# ClickHouse + +## Installation + +1. Install package + + **If you are developing with binary, the package is already bundled in the binary. You can skip this step.** + + ```bash + npm i @vulcan-sql/extension-driver-clickhouse + ``` + +2. Update `vulcan.yaml`, and enable the extension. + + ```yaml + extensions: + ... + // highlight-next-line + ch: '@vulcan-sql/extension-driver-clickhouse' # Add this line + ``` + +3. Create a new profile in `profiles.yaml` or in your profile files. For example: + + ```yaml + - name: ch # profile name + type: clickhouse + connection: + host: www.example.com:8123 + request_timeout: 60000 + compression: + request: true + max_open_connections: 10 + username: user + password: pass + database: hello-clickhouse + allow: '*' + ``` + +## Configuration + +Please check [arguments of ClickHouse Client](https://clickhouse.com/docs/en/integrations/language-clients/nodejs) for further information. + +| Name | Required | Default | Description | +| -------------------- | -------- | --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| host | N | http://localhost:8123 | ClickHouse instance URL. | +| request_timeout | N | 30000 | The request timeout in milliseconds. | +| max_open_connections | N | Infinity | Maximum number of sockets to allow per host. | +| compression | N | | Data applications operating with large datasets over the wire can benefit from enabling compression. Currently, only GZIP is supported using zlib.Please see [Compression docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#compression). | +| username | N | default | The name of the user on whose behalf requests are made. | +| password | N | | The user password. | +| application | N | VulcanSQL | The name of the application using the Node.js client. | +| database | N | default | Database name to use. | +| clickhouse_settings | N | | ClickHouse settings to apply to all requests. please see the [Advance Settings](https://clickhouse.com/docs/en/operations/settings), and [Definition](https://github.com/ClickHouse/clickhouse-js/blob/0.1.1/src/settings.ts) | +| tls | N | | Configure TLS certificates. Please see [TLS docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#tls-certificates). | +| session_id | N | | ClickHouse Session ID to send with every request. | +| keep_alive | N | | HTTP Keep-Alive related settings. Please see [Keep Alive docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#keep-alive) | + +At the above, it not contains `log` option, because the `logs` need to define a Logger class and assign to it, so it could not set by `profiles.yaml`. + +## Note + +The ClickHouse support parameterize query to prevent SQL Injection by prepared statement. The named placeholder define by `{name:type}`, please see [Query with Parameters](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#queries-with-parameters). + +However, The VulcanSQL API support API query parameter is JSON format, so it not support [variety types like ClickHouse](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#supported-clickhouse-data-types), The VulcanSQL will only support to convert below types: + +- `boolean` to `Boolean` ClickHouse type +- `number` to `Int` or `Float` ClickHouse type +- `string` to `String` ClickHouse type + +Therefore, if you would like to query the data is a special type from ClickHouse, e.g: `Array(Unit8)`, `Record`, `Date`, `DateTime` ...etc, you could use the ClickHouse [Regular Function](https://clickhouse.com/docs/en/sql-reference/functions) or [Type Conversion Function](https://clickhouse.com/docs/en/sql-reference/functions/type-conversion-functions) to do it. + +Example: + +```sql +-- If the val from API query parameter is '1990-11-01', and the born_date columns type is Date32 type +-- What is the toDate function, please see https://clickhouse.com/docs/en/sql-reference/functions/type-conversion-functions#todate +SELECT * FROM users WHERE born_date = toDate({val:String}); +``` + +## ⚠️ Caution + +ClickHouse driver currently not yet support for caching datasets feature. If you use the ClickHouse driver with caching dataset feature, it will be failed. diff --git a/packages/doc/docs/connect/overview.mdx b/packages/doc/docs/connect/overview.mdx index 2fb85b0d6..832f9eb1e 100644 --- a/packages/doc/docs/connect/overview.mdx +++ b/packages/doc/docs/connect/overview.mdx @@ -5,20 +5,24 @@ pagination_next: connect/bigquery # Overview We support the following data warehouses to connect with, you can choose multiple connectors in a single project, please check the connectors’ document for the installation guide. -* [PostgreSQL](./postgresql) -* [DuckDB](./duckdb) -* [Snowflake](./snowflake) -* [BigQuery](./bigquery) + +- [PostgreSQL](./postgresql) +- [DuckDB](./duckdb) +- [Snowflake](./snowflake) +- [BigQuery](./bigquery) +- [ClickHouse](./clickhouse) ## How to use + Setting up a data warehouse connector is easy, you can follow the steps below to set up a connector. 1. Once you install the connector package, update the `extensions` section in `vulcan.yaml` to include the connector package. For example: - ```yaml + + ```yaml extensions: - ... - // highlight-next-line - pg: '@vulcan-sql/extension-driver-pg' # Add this line + ... + // highlight-next-line + pg: '@vulcan-sql/extension-driver-pg' # Add this line ``` 2. Create a new profile in `profiles.yaml` or in your profile files. @@ -42,4 +46,4 @@ Setting up a data warehouse connector is easy, you can follow the steps below to - pg # profile name ``` - Then, you can query the data warehouse in your APIs. +Then, you can query the data warehouse in your APIs. diff --git a/packages/doc/docs/connectors.mdx b/packages/doc/docs/connectors.mdx index 6ccb2e9b7..d6dac1e46 100644 --- a/packages/doc/docs/connectors.mdx +++ b/packages/doc/docs/connectors.mdx @@ -8,6 +8,7 @@ We support the following data warehouses to connect with, you can choose multipl | [DuckDB](./connectors/duckdb) | ✅ Yes | ✅ Yes | ❌ No | | [Snowflake](./connectors/snowflake) | ✅ Yes | ✅ Yes | ❌ No | | [BigQuery](./connectors/bigquery) | ✅ Yes | ✅ Yes | ❌ No | +| [ClickHouse](./connectors/clickhouse) | ✅ Yes | ✅ Yes | ❌ No | \* Fetching rows only when we need them, it has better performance with large query results. diff --git a/packages/doc/docs/connectors/clickhouse.mdx b/packages/doc/docs/connectors/clickhouse.mdx new file mode 100644 index 000000000..ea4922705 --- /dev/null +++ b/packages/doc/docs/connectors/clickhouse.mdx @@ -0,0 +1,87 @@ +# ClickHouse + +Connect with your ClickHouse servers via the official [Node.js Driver](https://clickhouse.com/docs/en/integrations/language-clients/nodejs). + +## Installation + +1. Install package + + ```bash + npm i @vulcan-sql/extension-driver-clickhouse + ``` + + :::info + If you run VulcanSQL with Docker, you should use the command `vulcan-install @vulcan-sql/extension-driver-clickhouse` instead. + + ::: + +2. Update `vulcan.yaml`, and enable the extension. + + ```yaml + extensions: + ... + // highlight-next-line + ch: '@vulcan-sql/extension-driver-clickhouse' # Add this line + ``` + +3. Create a new profile in `profiles.yaml` or in your profile files. For example: + + ```yaml + - name: ch # profile name + type: clickhouse + connection: + host: www.example.com:8123 + request_timeout: 60000 + compression: + request: true + max_open_connections: 10 + username: user + password: pass + database: hello-clickhouse + allow: '*' + ``` + +## Configuration + +Please check [arguments of ClickHouse Client](https://clickhouse.com/docs/en/integrations/language-clients/nodejs) for further information. + +| Name | Required | Default | Description | +| -------------------- | -------- | --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| host | N | http://localhost:8123 | ClickHouse instance URL. | +| request_timeout | N | 30000 | The request timeout in milliseconds. | +| max_open_connections | N | Infinity | Maximum number of sockets to allow per host. | +| compression | N | | Data applications operating with large datasets over the wire can benefit from enabling compression. Currently, only GZIP is supported using zlib.Please see [Compression docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#compression). | +| username | N | default | The name of the user on whose behalf requests are made. | +| password | N | | The user password. | +| application | N | VulcanSQL | The name of the application using the Node.js client. | +| database | N | default | Database name to use. | +| clickhouse_settings | N | | ClickHouse settings to apply to all requests. please see the [Advance Settings](https://clickhouse.com/docs/en/operations/settings), and [Definition](https://github.com/ClickHouse/clickhouse-js/blob/0.1.1/src/settings.ts) | +| tls | N | | Configure TLS certificates. Please see [TLS docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#tls-certificates). | +| session_id | N | | ClickHouse Session ID to send with every request. | +| keep_alive | N | | HTTP Keep-Alive related settings. Please see [Keep Alive docs](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#keep-alive) | + +At the above, it not contains `log` option, because the `logs` need to define a Logger class and assign to it, so it could not set by `profiles.yaml`. + +## Note + +The ClickHouse support parameterize query to prevent SQL Injection by prepared statement. The named placeholder define by `{name:type}`, please see [Query with Parameters](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#queries-with-parameters). + +However, The VulcanSQL API support API query parameter is JSON format, so it not support [variety types like ClickHouse](https://clickhouse.com/docs/en/integrations/language-clients/nodejs#supported-clickhouse-data-types), The VulcanSQL will only support to convert below types: + +- `boolean` to `Boolean` ClickHouse type +- `number` to `Int` or `Float` ClickHouse type +- `string` to `String` ClickHouse type + +Therefore, if you would like to query the data is a special type from ClickHouse, e.g: `Array(Unit8)`, `Record`, `Date`, `DateTime` ...etc, you could use the ClickHouse [Regular Function](https://clickhouse.com/docs/en/sql-reference/functions) or [Type Conversion Function](https://clickhouse.com/docs/en/sql-reference/functions/type-conversion-functions) to do it. + +Example: + +```sql +-- If the val from API query parameter is '1990-11-01', and the born_date columns type is Date32 type +-- What is the toDate function, please see https://clickhouse.com/docs/en/sql-reference/functions/type-conversion-functions#todate +SELECT * FROM users WHERE born_date = toDate({val:String}); +``` + +## ⚠️ Caution + +ClickHouse driver currently not yet support for caching datasets feature. If you use the ClickHouse driver with caching dataset feature, it will be failed. diff --git a/packages/extension-driver-clickhouse/README.md b/packages/extension-driver-clickhouse/README.md index e25cee10d..30d9dac76 100644 --- a/packages/extension-driver-clickhouse/README.md +++ b/packages/extension-driver-clickhouse/README.md @@ -94,9 +94,7 @@ SELECT * FROM users WHERE born_date = toDate({val:String}); ## ⚠️ Caution -ClickHouse driver currently not yet support for caching datasets feature. - -If you use the ClickHouse driver and setup the cache options in API Schema yaml, it will throw error. +ClickHouse driver currently not yet support for caching datasets feature. If you use the ClickHouse driver with caching dataset feature, it will be failed. ## Testing diff --git a/packages/extension-driver-clickhouse/package.json b/packages/extension-driver-clickhouse/package.json index ff3f7c0cc..c37728927 100644 --- a/packages/extension-driver-clickhouse/package.json +++ b/packages/extension-driver-clickhouse/package.json @@ -15,8 +15,8 @@ "data-warehouse", "data-lake", "api-builder", - "snowflake", - "snow" + "clickhouse", + "ch" ], "repository": { "type": "git", diff --git a/packages/extension-driver-clickhouse/src/lib/clickhouseDataSource.ts b/packages/extension-driver-clickhouse/src/lib/clickhouseDataSource.ts index a43d4ece0..95d077107 100644 --- a/packages/extension-driver-clickhouse/src/lib/clickhouseDataSource.ts +++ b/packages/extension-driver-clickhouse/src/lib/clickhouseDataSource.ts @@ -120,6 +120,8 @@ export class ClickHouseDataSource extends DataSource { const [namesRow, typesRow, ...dataRows] = rows; names = JSON.parse(namesRow.text); types = JSON.parse(typesRow.text); + // ClickHouse stream only called once and return all data row in one chuck, so we need to push each row to the stream by loop. + // Please see https://clickhouse.com/docs/en/integrations/language-clients/nodejs#resultset-and-row-abstractions dataRows.forEach((row) => dataRowStream.push(row.text)); }); await new Promise((resolve) => {