From 3a9352d645a390bc7b67d1adeb61bb52c6b3380d Mon Sep 17 00:00:00 2001 From: tycho garen Date: Wed, 20 Mar 2024 17:32:01 -0400 Subject: [PATCH 01/34] chore: rename glaredb -> cli crate --- Cargo.lock | 94 +++++++++---------- bindings/nodejs/Cargo.toml | 12 +-- bindings/python/Cargo.toml | 4 +- crates/bench_runner/Cargo.toml | 2 +- crates/bench_runner/src/main.rs | 2 +- crates/{glaredb => cli}/Cargo.toml | 2 +- crates/{glaredb => cli}/build.rs | 0 crates/{glaredb => cli}/src/args/local.rs | 0 crates/{glaredb => cli}/src/args/mod.rs | 0 crates/{glaredb => cli}/src/args/server.rs | 0 crates/{glaredb => cli}/src/args/slt.rs | 0 crates/{glaredb => cli}/src/bin/main.rs | 0 crates/{glaredb => cli}/src/commands.rs | 0 crates/{glaredb => cli}/src/highlighter.rs | 0 crates/{glaredb => cli}/src/lib.rs | 0 crates/{glaredb => cli}/src/local.rs | 0 crates/{glaredb => cli}/src/metastore.rs | 0 crates/{glaredb => cli}/src/prompt.rs | 0 crates/{glaredb => cli}/src/proxy/mod.rs | 0 crates/{glaredb => cli}/src/proxy/pg.rs | 0 crates/{glaredb => cli}/src/proxy/rpc.rs | 0 crates/{glaredb => cli}/src/server.rs | 0 .../tests/drop_tables_test.rs | 0 crates/{glaredb => cli}/tests/iss_2309.rs | 0 .../{glaredb => cli}/tests/local_args_test.rs | 0 .../{glaredb => cli}/tests/log_file_test.rs | 0 crates/{glaredb => cli}/tests/logging_test.rs | 0 .../tests/output_mode_test.rs | 0 .../tests/server_args_test.rs | 0 crates/{glaredb => cli}/tests/setup.rs | 0 crates/{glaredb => cli}/tests/version_test.rs | 0 flake.nix | 2 +- 32 files changed, 56 insertions(+), 62 deletions(-) rename crates/{glaredb => cli}/Cargo.toml (98%) rename crates/{glaredb => cli}/build.rs (100%) rename crates/{glaredb => cli}/src/args/local.rs (100%) rename crates/{glaredb => cli}/src/args/mod.rs (100%) rename crates/{glaredb => cli}/src/args/server.rs (100%) rename crates/{glaredb => cli}/src/args/slt.rs (100%) rename crates/{glaredb => cli}/src/bin/main.rs (100%) rename crates/{glaredb => cli}/src/commands.rs (100%) rename crates/{glaredb => cli}/src/highlighter.rs (100%) rename crates/{glaredb => cli}/src/lib.rs (100%) rename crates/{glaredb => cli}/src/local.rs (100%) rename crates/{glaredb => cli}/src/metastore.rs (100%) rename crates/{glaredb => cli}/src/prompt.rs (100%) rename crates/{glaredb => cli}/src/proxy/mod.rs (100%) rename crates/{glaredb => cli}/src/proxy/pg.rs (100%) rename crates/{glaredb => cli}/src/proxy/rpc.rs (100%) rename crates/{glaredb => cli}/src/server.rs (100%) rename crates/{glaredb => cli}/tests/drop_tables_test.rs (100%) rename crates/{glaredb => cli}/tests/iss_2309.rs (100%) rename crates/{glaredb => cli}/tests/local_args_test.rs (100%) rename crates/{glaredb => cli}/tests/log_file_test.rs (100%) rename crates/{glaredb => cli}/tests/logging_test.rs (100%) rename crates/{glaredb => cli}/tests/output_mode_test.rs (100%) rename crates/{glaredb => cli}/tests/server_args_test.rs (100%) rename crates/{glaredb => cli}/tests/setup.rs (100%) rename crates/{glaredb => cli}/tests/version_test.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 9cb979ba9..5c0b2c19a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1095,7 +1095,7 @@ dependencies = [ "anyhow", "async-trait", "clap", - "glaredb", + "cli", "glob", "logutil", "pgsrv", @@ -1613,6 +1613,51 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "cli" +version = "0.9.2" +dependencies = [ + "anyhow", + "arrow_util", + "assert_cmd", + "atty", + "built", + "clap", + "colored", + "console-subscriber", + "datafusion", + "datafusion_ext", + "futures", + "glob", + "ioutil", + "logutil", + "lzma-sys", + "metastore", + "nu-ansi-term 0.50.0", + "num_cpus", + "object_store", + "object_store_util", + "pgrepr", + "pgsrv", + "predicates", + "protogen", + "proxyutil", + "reedline", + "rpcsrv", + "slt", + "sqlbuiltins", + "sqlexec", + "telemetry", + "tempfile", + "terminal_util", + "tokio", + "tokio-postgres", + "tonic", + "tracing", + "url", + "uuid", +] + [[package]] name = "cmake" version = "0.1.50" @@ -3472,51 +3517,6 @@ dependencies = [ "url", ] -[[package]] -name = "glaredb" -version = "0.9.2" -dependencies = [ - "anyhow", - "arrow_util", - "assert_cmd", - "atty", - "built", - "clap", - "colored", - "console-subscriber", - "datafusion", - "datafusion_ext", - "futures", - "glob", - "ioutil", - "logutil", - "lzma-sys", - "metastore", - "nu-ansi-term 0.50.0", - "num_cpus", - "object_store", - "object_store_util", - "pgrepr", - "pgsrv", - "predicates", - "protogen", - "proxyutil", - "reedline", - "rpcsrv", - "slt", - "sqlbuiltins", - "sqlexec", - "telemetry", - "tempfile", - "terminal_util", - "tokio", - "tokio-postgres", - "tonic", - "tracing", - "url", - "uuid", -] - [[package]] name = "glob" version = "0.3.1" @@ -3991,7 +3991,6 @@ dependencies = [ "datafusion", "datafusion_ext", "futures", - "glaredb", "ioutil", "lzma-sys", "metastore", @@ -6262,7 +6261,6 @@ dependencies = [ "datafusion", "datafusion_ext", "futures", - "glaredb", "ioutil", "lzma-sys", "metastore", diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index f99f90000..7301954df 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -10,7 +10,6 @@ crate-type = ["cdylib"] workspace = true [dependencies] -# Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix ioutil = { path = "../../crates/ioutil" } sqlexec = { path = "../../crates/sqlexec" } metastore = { path = "../../crates/metastore" } @@ -19,7 +18,6 @@ pgsrv = { path = "../../crates/pgsrv" } pgrepr = { path = "../../crates/pgrepr" } datafusion_ext = { path = "../../crates/datafusion_ext" } arrow_util = { path = "../../crates/arrow_util" } -glaredb = { path = "../../crates/glaredb" } terminal_util = { path = "../../crates/terminal_util" } futures = { workspace = true } datafusion.workspace = true @@ -27,15 +25,13 @@ thiserror = { workspace = true } url = { workspace = true } anyhow = { workspace = true } async-trait = { workspace = true } -serde.workspace = true -serde_json.workspace = true +serde = { workspace = true } +serde_json = { workspace = true } +napi = { version = "2.16.0", default-features = false, features = ["full"] } # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix +lzma-sys = { version = "*", features = ["static"] } # Prevent dynamic linking of lzma, which comes from datafusion once_cell = "1.19.0" bytes = "1.5.0" napi-derive = "2.16.0" -napi = { version = "2.16.0", default-features = false, features = ["full"] } - -# Prevent dynamic linking of lzma, which comes from datafusion -lzma-sys = { version = "*", features = ["static"] } [build-dependencies] napi-build = "2.1.2" diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 0aabd76bf..25405307b 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -27,7 +27,7 @@ pgsrv = { path = "../../crates/pgsrv" } pgrepr = { path = "../../crates/pgrepr" } datafusion_ext = { path = "../../crates/datafusion_ext" } arrow_util = { path = "../../crates/arrow_util" } -glaredb = { path = "../../crates/glaredb" } +# glaredb = { path = "../../crates/glaredb" } terminal_util = { path = "../../crates/terminal_util" } futures = { workspace = true } uuid = "1.7.0" @@ -35,4 +35,4 @@ async-trait = { workspace = true } once_cell = "1.19.0" # Prevent dynamic linking of lzma, which comes from datafusion -lzma-sys = { version = "*", features = ["static"] } \ No newline at end of file +lzma-sys = { version = "*", features = ["static"] } diff --git a/crates/bench_runner/Cargo.toml b/crates/bench_runner/Cargo.toml index 4820e58e0..8fe9e2460 100644 --- a/crates/bench_runner/Cargo.toml +++ b/crates/bench_runner/Cargo.toml @@ -8,7 +8,7 @@ workspace = true [dependencies] logutil = {path = "../logutil"} -glaredb = {path = "../glaredb"} +cli = {path = "../cli"} pgsrv = {path = "../pgsrv"} anyhow = { workspace = true } async-trait = { workspace = true } diff --git a/crates/bench_runner/src/main.rs b/crates/bench_runner/src/main.rs index 73a8832dd..555145b96 100644 --- a/crates/bench_runner/src/main.rs +++ b/crates/bench_runner/src/main.rs @@ -4,7 +4,7 @@ use std::time::{Duration, SystemTime}; use anyhow::Result; use clap::Parser; -use glaredb::server::ComputeServer; +use cli::server::ComputeServer; use glob::glob; use pgsrv::auth::SingleUserAuthenticator; use tokio::net::TcpListener; diff --git a/crates/glaredb/Cargo.toml b/crates/cli/Cargo.toml similarity index 98% rename from crates/glaredb/Cargo.toml rename to crates/cli/Cargo.toml index b6e8cbc43..c895c98a4 100644 --- a/crates/glaredb/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "glaredb" +name = "cli" version = { workspace = true } edition = { workspace = true } diff --git a/crates/glaredb/build.rs b/crates/cli/build.rs similarity index 100% rename from crates/glaredb/build.rs rename to crates/cli/build.rs diff --git a/crates/glaredb/src/args/local.rs b/crates/cli/src/args/local.rs similarity index 100% rename from crates/glaredb/src/args/local.rs rename to crates/cli/src/args/local.rs diff --git a/crates/glaredb/src/args/mod.rs b/crates/cli/src/args/mod.rs similarity index 100% rename from crates/glaredb/src/args/mod.rs rename to crates/cli/src/args/mod.rs diff --git a/crates/glaredb/src/args/server.rs b/crates/cli/src/args/server.rs similarity index 100% rename from crates/glaredb/src/args/server.rs rename to crates/cli/src/args/server.rs diff --git a/crates/glaredb/src/args/slt.rs b/crates/cli/src/args/slt.rs similarity index 100% rename from crates/glaredb/src/args/slt.rs rename to crates/cli/src/args/slt.rs diff --git a/crates/glaredb/src/bin/main.rs b/crates/cli/src/bin/main.rs similarity index 100% rename from crates/glaredb/src/bin/main.rs rename to crates/cli/src/bin/main.rs diff --git a/crates/glaredb/src/commands.rs b/crates/cli/src/commands.rs similarity index 100% rename from crates/glaredb/src/commands.rs rename to crates/cli/src/commands.rs diff --git a/crates/glaredb/src/highlighter.rs b/crates/cli/src/highlighter.rs similarity index 100% rename from crates/glaredb/src/highlighter.rs rename to crates/cli/src/highlighter.rs diff --git a/crates/glaredb/src/lib.rs b/crates/cli/src/lib.rs similarity index 100% rename from crates/glaredb/src/lib.rs rename to crates/cli/src/lib.rs diff --git a/crates/glaredb/src/local.rs b/crates/cli/src/local.rs similarity index 100% rename from crates/glaredb/src/local.rs rename to crates/cli/src/local.rs diff --git a/crates/glaredb/src/metastore.rs b/crates/cli/src/metastore.rs similarity index 100% rename from crates/glaredb/src/metastore.rs rename to crates/cli/src/metastore.rs diff --git a/crates/glaredb/src/prompt.rs b/crates/cli/src/prompt.rs similarity index 100% rename from crates/glaredb/src/prompt.rs rename to crates/cli/src/prompt.rs diff --git a/crates/glaredb/src/proxy/mod.rs b/crates/cli/src/proxy/mod.rs similarity index 100% rename from crates/glaredb/src/proxy/mod.rs rename to crates/cli/src/proxy/mod.rs diff --git a/crates/glaredb/src/proxy/pg.rs b/crates/cli/src/proxy/pg.rs similarity index 100% rename from crates/glaredb/src/proxy/pg.rs rename to crates/cli/src/proxy/pg.rs diff --git a/crates/glaredb/src/proxy/rpc.rs b/crates/cli/src/proxy/rpc.rs similarity index 100% rename from crates/glaredb/src/proxy/rpc.rs rename to crates/cli/src/proxy/rpc.rs diff --git a/crates/glaredb/src/server.rs b/crates/cli/src/server.rs similarity index 100% rename from crates/glaredb/src/server.rs rename to crates/cli/src/server.rs diff --git a/crates/glaredb/tests/drop_tables_test.rs b/crates/cli/tests/drop_tables_test.rs similarity index 100% rename from crates/glaredb/tests/drop_tables_test.rs rename to crates/cli/tests/drop_tables_test.rs diff --git a/crates/glaredb/tests/iss_2309.rs b/crates/cli/tests/iss_2309.rs similarity index 100% rename from crates/glaredb/tests/iss_2309.rs rename to crates/cli/tests/iss_2309.rs diff --git a/crates/glaredb/tests/local_args_test.rs b/crates/cli/tests/local_args_test.rs similarity index 100% rename from crates/glaredb/tests/local_args_test.rs rename to crates/cli/tests/local_args_test.rs diff --git a/crates/glaredb/tests/log_file_test.rs b/crates/cli/tests/log_file_test.rs similarity index 100% rename from crates/glaredb/tests/log_file_test.rs rename to crates/cli/tests/log_file_test.rs diff --git a/crates/glaredb/tests/logging_test.rs b/crates/cli/tests/logging_test.rs similarity index 100% rename from crates/glaredb/tests/logging_test.rs rename to crates/cli/tests/logging_test.rs diff --git a/crates/glaredb/tests/output_mode_test.rs b/crates/cli/tests/output_mode_test.rs similarity index 100% rename from crates/glaredb/tests/output_mode_test.rs rename to crates/cli/tests/output_mode_test.rs diff --git a/crates/glaredb/tests/server_args_test.rs b/crates/cli/tests/server_args_test.rs similarity index 100% rename from crates/glaredb/tests/server_args_test.rs rename to crates/cli/tests/server_args_test.rs diff --git a/crates/glaredb/tests/setup.rs b/crates/cli/tests/setup.rs similarity index 100% rename from crates/glaredb/tests/setup.rs rename to crates/cli/tests/setup.rs diff --git a/crates/glaredb/tests/version_test.rs b/crates/cli/tests/version_test.rs similarity index 100% rename from crates/glaredb/tests/version_test.rs rename to crates/cli/tests/version_test.rs diff --git a/flake.nix b/flake.nix index 7a55cee7a..6db0f3dcc 100644 --- a/flake.nix +++ b/flake.nix @@ -19,7 +19,7 @@ src = ./.; doCheck = false; nativeBuildInputs = with pkgs; [ protobuf ]; - buildAndTestSubdir = "crates/glaredb"; + buildAndTestSubdir = "crates/cli"; preBuild = '' export PROTOC=${pkgs.protobuf}/bin/protoc ''; From 0cc08ec9a81c6ef234bf093a232282d773c427b0 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Tue, 26 Mar 2024 17:36:57 -0400 Subject: [PATCH 02/34] chore: engine constructor --- bindings/nodejs/src/connection.rs | 26 ++++++++++++++------------ bindings/python/src/connect.rs | 22 +++++++++++----------- bindings/python/src/connection.rs | 4 ++-- crates/cli/src/bin/main.rs | 4 ++-- crates/sqlexec/src/engine.rs | 19 +++++++++++++++++++ 5 files changed, 48 insertions(+), 27 deletions(-) diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index aa8487501..b11294bf9 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -6,7 +6,7 @@ use datafusion::logical_expr::LogicalPlan as DFLogicalPlan; use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; use ioutil::ensure_dir; -use sqlexec::engine::{Engine, SessionStorageConfig, TrackedSession}; +use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig, TrackedSession}; use sqlexec::remote::client::{RemoteClient, RemoteClientType}; use sqlexec::{LogicalPlan, OperationInfo}; use url::Url; @@ -66,19 +66,21 @@ impl Connection { ) -> napi::Result { let conf = JsSessionConf::from(data_dir_or_cloud_url); - let mut engine = if let Some(location) = location { - // TODO: try to consolidate with --data-dir option - Engine::from_storage_options(&location, &storage_options.unwrap_or_default()) - .await - .map_err(JsGlareDbError::from)? + let backend = if let Some(location) = location.clone() { + EngineBackend::Remote { + location, + options: storage_options.unwrap_or_default(), + } + } else if let Some(data_dir) = conf.data_dir.clone() { + EngineBackend::Local(data_dir) } else { - // If data dir is provided, then both table storage and metastore - // storage will reside at that path. Otherwise everything is in memory. - Engine::from_data_dir(conf.data_dir.as_ref()) - .await - .map_err(JsGlareDbError::from)? + EngineBackend::Memory }; + let mut engine = Engine::from_backend(backend) + .await + .map_err(JsGlareDbError::from)?; + // If spill path not provided, default to some tmp dir. let spill_path = match spill_path { Some(p) => { @@ -134,7 +136,7 @@ impl Connection { /// return the same connection. #[napi(catch_unwind)] pub async fn default_in_memory() -> napi::Result { - let engine = Engine::from_data_dir(None) + let engine = Engine::from_backend(EngineBackend::Memory) .await .map_err(JsGlareDbError::from)?; let sess = engine diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index 38a493b15..d8d2fe606 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -11,7 +11,7 @@ use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; use ioutil::ensure_dir; use pyo3::prelude::*; -use sqlexec::engine::{Engine, SessionStorageConfig}; +use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig}; use sqlexec::remote::client::{RemoteClient, RemoteClientType}; use url::Url; @@ -91,19 +91,19 @@ pub fn connect( wait_for_future(py, async move { let conf = PythonSessionConf::from(data_dir_or_cloud_url); - let mut engine = if let Some(location) = location { - // TODO: try to consolidate with --data-dir option - Engine::from_storage_options(&location, &storage_options.unwrap_or_default()) - .await - .map_err(PyGlareDbError::from)? + let backend = if let Some(location) = location.clone() { + EngineBackend::Remote { + location, + options: storage_options.unwrap_or_default(), + } + } else if let Some(data_dir) = conf.data_dir.clone() { + EngineBackend::Local(data_dir) } else { - // If data dir is provided, then both table storage and metastore - // storage will reside at that path. Otherwise everything is in memory. - Engine::from_data_dir(conf.data_dir.as_ref()) - .await - .map_err(PyGlareDbError::from)? + EngineBackend::Memory }; + let mut engine = Engine::from_backend(backend).await?; + // If spill path not provided, default to some tmp dir. let spill_path = match spill_path { Some(p) => { diff --git a/bindings/python/src/connection.rs b/bindings/python/src/connection.rs index 60c44b4aa..e16a13648 100644 --- a/bindings/python/src/connection.rs +++ b/bindings/python/src/connection.rs @@ -6,7 +6,7 @@ use futures::lock::Mutex; use once_cell::sync::OnceCell; use pyo3::prelude::*; use pyo3::types::PyType; -use sqlexec::engine::{Engine, SessionStorageConfig, TrackedSession}; +use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig, TrackedSession}; use sqlexec::{LogicalPlan, OperationInfo}; use crate::execution_result::PyExecutionResult; @@ -35,7 +35,7 @@ impl Connection { let con = DEFAULT_CON.get_or_try_init(|| { wait_for_future(py, async move { - let engine = Engine::from_data_dir(None).await?; + let engine = Engine::from_backend(EngineBackend::Memory).await?; let sess = engine .new_local_session_context( SessionVars::default(), diff --git a/crates/cli/src/bin/main.rs b/crates/cli/src/bin/main.rs index 8199d20b3..5a90840b9 100644 --- a/crates/cli/src/bin/main.rs +++ b/crates/cli/src/bin/main.rs @@ -1,7 +1,7 @@ use anyhow::Result; use clap::{Parser, ValueEnum}; -use glaredb::args::LocalArgs; -use glaredb::commands::Commands; +use cli::args::LocalArgs; +use cli::commands::Commands; #[derive(Debug, Clone, Copy, ValueEnum, Default)] enum LoggingMode { diff --git a/crates/sqlexec/src/engine.rs b/crates/sqlexec/src/engine.rs index c8f273419..8f9387636 100644 --- a/crates/sqlexec/src/engine.rs +++ b/crates/sqlexec/src/engine.rs @@ -323,6 +323,15 @@ pub struct Engine { _task_executors: Vec, } +pub enum EngineBackend { + Memory, + Local(PathBuf), + Remote { + location: String, + options: HashMap, + }, +} + impl Engine { /// Create a new engine using the provided access runtime. pub async fn new( @@ -359,6 +368,16 @@ impl Engine { self.tracker.clone() } + pub async fn from_backend(opts: EngineBackend) -> Result { + match opts { + EngineBackend::Memory => Self::from_data_dir(None).await, + EngineBackend::Local(path) => Self::from_data_dir(Some(&path)).await, + EngineBackend::Remote { location, options } => { + Self::from_storage_options(&location, &options).await + } + } + } + /// Create a new `Engine` instance from the provided storage configuration with a in-process metastore pub async fn from_storage_options( location: &str, From 401d74efd00b5c689cd7ae8480fde85f4dd30c8c Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 21 Mar 2024 13:06:46 -0400 Subject: [PATCH 03/34] feat: high level glaredb rust api/sdk --- Cargo.lock | 68 +++++++++++++++++++++++++++++--------- bindings/python/Cargo.toml | 15 ++++----- crates/glaredb/Cargo.toml | 10 ++++++ crates/glaredb/src/lib.rs | 32 ++++++++++++++++++ 4 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 crates/glaredb/Cargo.toml create mode 100644 crates/glaredb/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 5c0b2c19a..eefdd5335 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -549,7 +549,7 @@ dependencies = [ "backoff", "base64 0.21.7", "bytes", - "derive_builder", + "derive_builder 0.12.0", "futures", "rand", "reqwest", @@ -2111,12 +2111,12 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.3" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" +checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" dependencies = [ - "darling_core 0.20.3", - "darling_macro 0.20.3", + "darling_core 0.20.8", + "darling_macro 0.20.8", ] [[package]] @@ -2149,9 +2149,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.3" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" dependencies = [ "fnv", "ident_case", @@ -2185,11 +2185,11 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.3" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ - "darling_core 0.20.3", + "darling_core 0.20.8", "quote", "syn 2.0.48", ] @@ -2740,7 +2740,16 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" dependencies = [ - "derive_builder_macro", + "derive_builder_macro 0.12.0", +] + +[[package]] +name = "derive_builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +dependencies = [ + "derive_builder_macro 0.20.0", ] [[package]] @@ -2755,16 +2764,38 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_builder_core" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +dependencies = [ + "darling 0.20.8", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "derive_builder_macro" version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" dependencies = [ - "derive_builder_core", + "derive_builder_core 0.12.0", "syn 1.0.109", ] +[[package]] +name = "derive_builder_macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +dependencies = [ + "derive_builder_core 0.20.0", + "syn 2.0.48", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -3517,6 +3548,13 @@ dependencies = [ "url", ] +[[package]] +name = "glaredb" +version = "0.9.2" +dependencies = [ + "derive_builder 0.20.0", +] + [[package]] name = "glob" version = "0.3.1" @@ -5071,7 +5109,7 @@ version = "0.30.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56b0d8a0db9bf6d2213e11f2c701cb91387b0614361625ab7b9743b41aa4938f" dependencies = [ - "darling 0.20.3", + "darling 0.20.8", "heck 0.4.1", "num-bigint", "proc-macro-crate 1.3.1", @@ -7288,7 +7326,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb6085ff9c3fd7e5163826901d39164ab86f11bdca16b2f766a00c528ff9cef9" dependencies = [ - "darling 0.20.3", + "darling 0.20.8", "proc-macro2", "quote", "syn 2.0.48", @@ -7531,7 +7569,7 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6561dc161a9224638a31d876ccdfefbc1df91d3f3a8342eddb35f055d48c7655" dependencies = [ - "darling 0.20.3", + "darling 0.20.8", "proc-macro2", "quote", "syn 2.0.48", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 25405307b..2bbf4159d 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -14,12 +14,6 @@ crate-type = ["cdylib"] [dependencies] ioutil = { path = "../../crates/ioutil" } -tokio.workspace = true -datafusion = { workspace = true, features = ["pyarrow"] } -thiserror.workspace = true -url.workspace = true -anyhow = { workspace = true } -pyo3 = { version = "0.20.3", features = ["abi3-py37", "extension-module"] } sqlexec = { path = "../../crates/sqlexec" } metastore = { path = "../../crates/metastore" } telemetry = { path = "../../crates/telemetry" } @@ -27,11 +21,16 @@ pgsrv = { path = "../../crates/pgsrv" } pgrepr = { path = "../../crates/pgrepr" } datafusion_ext = { path = "../../crates/datafusion_ext" } arrow_util = { path = "../../crates/arrow_util" } -# glaredb = { path = "../../crates/glaredb" } terminal_util = { path = "../../crates/terminal_util" } +datafusion = { workspace = true, features = ["pyarrow"] } +tokio = { workspace = true } +thiserror = { workspace = true } +url = { workspace = true } +anyhow = { workspace = true } futures = { workspace = true } -uuid = "1.7.0" async-trait = { workspace = true } +pyo3 = { version = "0.20.3", features = ["abi3-py37", "extension-module"] } +uuid = "1.7.0" once_cell = "1.19.0" # Prevent dynamic linking of lzma, which comes from datafusion diff --git a/crates/glaredb/Cargo.toml b/crates/glaredb/Cargo.toml new file mode 100644 index 000000000..4731641d6 --- /dev/null +++ b/crates/glaredb/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "glaredb" +version.workspace = true +edition.workspace = true + +[lints] +workspace = true + +[dependencies] +derive_builder = "0.20.0" diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs new file mode 100644 index 000000000..1a2426689 --- /dev/null +++ b/crates/glaredb/src/lib.rs @@ -0,0 +1,32 @@ +use std::collections::HashMap; + +use derive_builder::Builder; + +#[derive(Default, Builder)] +pub struct ConnectOptions { + #[builder(setter(into, strip_option))] + pub location: Option, + #[builder(setter(into, strip_option))] + pub spill_path: Option, + pub disable_tls: Option, + #[builder(setter(strip_option))] + pub storage_options: HashMap, + #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] + #[builder(setter(into, strip_option))] + pub cloud_addr: Option, +} + +impl ConnectOptionsBuilder { + pub fn set_storage_option( + &mut self, + key: impl Into, + value: impl Into, + ) -> &mut Self { + let mut opts = match self.storage_options.to_owned() { + Some(opts) => opts, + None => HashMap::new(), + }; + opts.insert(key.into(), value.into()); + self.storage_options(opts) + } +} From 379f300ef27cf6ddd4ff3b05b4b407b88ad397bb Mon Sep 17 00:00:00 2001 From: tycho garen Date: Wed, 27 Mar 2024 17:04:54 -0400 Subject: [PATCH 04/34] chore: refactor connection, create driver connect --- Cargo.lock | 2 + bindings/nodejs/src/connection.rs | 65 ++++++++---------------- bindings/python/src/connect.rs | 56 +++++++-------------- bindings/python/src/connection.rs | 14 +++--- bindings/python/src/error.rs | 12 ++--- crates/cli/src/local.rs | 2 +- crates/glaredb/Cargo.toml | 2 + crates/glaredb/src/lib.rs | 77 ++++++++++++++++++++++++++++- crates/sqlexec/Cargo.toml | 15 +++--- crates/sqlexec/src/engine.rs | 30 +++++++++-- crates/sqlexec/src/remote/client.rs | 2 +- crates/sqlexec/src/session.rs | 26 +++++++++- 12 files changed, 193 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eefdd5335..3de569e8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3553,6 +3553,8 @@ name = "glaredb" version = "0.9.2" dependencies = [ "derive_builder 0.20.0", + "sqlexec", + "url", ] [[package]] diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index b11294bf9..23fbd80b5 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -20,7 +20,7 @@ pub(super) type JsTrackedSession = Arc>; #[napi] #[derive(Clone)] pub struct Connection { - pub(crate) sess: JsTrackedSession, + pub(crate) session: JsTrackedSession, pub(crate) _engine: Arc, } @@ -81,55 +81,32 @@ impl Connection { .await .map_err(JsGlareDbError::from)?; - // If spill path not provided, default to some tmp dir. - let spill_path = match spill_path { - Some(p) => { - let path = PathBuf::from(p); - ensure_dir(&path)?; - Some(path) - } - None => { - let path = std::env::temp_dir().join("glaredb-js"); - // if user doesn't have permission to write to temp dir, then - // just don't use a spill path. - ensure_dir(&path).ok().map(|_| path) - } - }; - engine = engine.with_spill_path(spill_path); + engine = engine + .with_spill_path(spill_path.map(|p| p.into())) + .map_err(JsGlareDbError::from)?; + + let mut session = engine + .default_local_session_context() + .await + .map_err(JsGlareDbError::from)?; - let session = if let Some(url) = conf.cloud_url.clone() { - let exec_client = RemoteClient::connect_with_proxy_destination( - url.try_into().map_err(JsGlareDbError::from)?, + session + .create_client_session( + conf.cloud_url.clone(), cloud_addr, disable_tls, RemoteClientType::Node, + None, ) .await .map_err(JsGlareDbError::from)?; - let mut sess = engine - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await - .map_err(JsGlareDbError::from)?; - sess.attach_remote_session(exec_client.clone(), None) - .await - .map_err(JsGlareDbError::from)?; - - sess - } else { - engine - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await - .map_err(JsGlareDbError::from)? - }; - - let sess = Arc::new(Mutex::new(session)); - Ok(Connection { - sess, + session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), }) } + /// Returns a default connection to an in-memory database. /// /// The database is only initialized once, and all subsequent calls will @@ -144,7 +121,7 @@ impl Connection { .await .map_err(JsGlareDbError::from)?; let con = Connection { - sess: Arc::new(Mutex::new(sess)), + session: Arc::new(Mutex::new(sess)), _engine: Arc::new(engine), }; @@ -190,8 +167,8 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn sql(&self, query: String) -> napi::Result { - let cloned_sess = self.sess.clone(); - let mut sess = self.sess.lock().await; + let cloned_sess = self.session.clone(); + let mut sess = self.session.lock().await; let plan = sess .create_logical_plan(&query) @@ -238,8 +215,8 @@ impl Connection { /// processed. #[napi(catch_unwind)] pub async fn prql(&self, query: String) -> napi::Result { - let cloned_sess = self.sess.clone(); - let mut sess = self.sess.lock().await; + let cloned_sess = self.session.clone(); + let mut sess = self.session.lock().await; let plan = sess .prql_to_lp(&query) .await @@ -264,7 +241,7 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn execute(&self, query: String) -> napi::Result<()> { - let sess = self.sess.clone(); + let sess = self.session.clone(); let mut sess = sess.lock().await; let plan = sess diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index d8d2fe606..e29f717f3 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -9,10 +9,9 @@ use std::sync::Arc; use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; -use ioutil::ensure_dir; use pyo3::prelude::*; use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig}; -use sqlexec::remote::client::{RemoteClient, RemoteClientType}; +use sqlexec::remote::client::RemoteClientType; use url::Url; use crate::connection::Connection; @@ -102,55 +101,34 @@ pub fn connect( EngineBackend::Memory }; - let mut engine = Engine::from_backend(backend).await?; + let mut engine = Engine::from_backend(backend) + .await + .map_err(PyGlareDbError::from)?; - // If spill path not provided, default to some tmp dir. - let spill_path = match spill_path { - Some(p) => { - let path = PathBuf::from(p); - ensure_dir(&path)?; - Some(path) - } - None => { - let path = std::env::temp_dir().join("glaredb-python"); - // if user doesn't have permission to write to temp dir, then - // just don't use a spill path. - ensure_dir(&path).ok().map(|_| path) - } - }; - engine = engine.with_spill_path(spill_path); + engine = engine + .with_spill_path(spill_path.map(|p| p.into())) + .map_err(PyGlareDbError::from)?; - let mut session = if let Some(url) = conf.cloud_url.clone() { - let exec_client = RemoteClient::connect_with_proxy_destination( - url.try_into().map_err(PyGlareDbError::from)?, + let mut session = engine + .default_local_session_context() + .await + .map_err(PyGlareDbError::from)?; + + session + .create_client_session( + conf.cloud_url.clone(), cloud_addr, disable_tls, RemoteClientType::Python, + None, ) .await .map_err(PyGlareDbError::from)?; - let mut sess = engine - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await - .map_err(PyGlareDbError::from)?; - sess.attach_remote_session(exec_client.clone(), None) - .await - .map_err(PyGlareDbError::from)?; - - sess - } else { - engine - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await - .map_err(PyGlareDbError::from)? - }; - session.register_env_reader(Box::new(PyEnvironmentReader)); - let sess = Arc::new(Mutex::new(session)); Ok(Connection { - sess, + session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), }) }) diff --git a/bindings/python/src/connection.rs b/bindings/python/src/connection.rs index e16a13648..28a3ef727 100644 --- a/bindings/python/src/connection.rs +++ b/bindings/python/src/connection.rs @@ -21,7 +21,7 @@ use crate::runtime::wait_for_future; #[pyclass] #[derive(Clone)] pub struct Connection { - pub(super) sess: PyTrackedSession, + pub(super) session: PyTrackedSession, pub(super) _engine: Arc, } @@ -43,7 +43,7 @@ impl Connection { ) .await?; Ok(Connection { - sess: Arc::new(Mutex::new(sess)), + session: Arc::new(Mutex::new(sess)), _engine: Arc::new(engine), }) as Result<_, PyGlareDbError> }) @@ -110,9 +110,9 @@ impl Connection { /// con.sql('create table my_table (a int)').execute() /// ``` pub fn sql(&mut self, py: Python<'_>, query: &str) -> PyResult { - let cloned_sess = self.sess.clone(); + let cloned_sess = self.session.clone(); wait_for_future(py, async move { - let mut sess = self.sess.lock().await; + let mut sess = self.session.lock().await; let plan = sess .create_logical_plan(query) @@ -159,9 +159,9 @@ impl Connection { /// All operations execute lazily when their results are /// processed. pub fn prql(&mut self, py: Python<'_>, query: &str) -> PyResult { - let cloned_sess = self.sess.clone(); + let cloned_sess = self.session.clone(); wait_for_future(py, async move { - let mut sess = self.sess.lock().await; + let mut sess = self.session.lock().await; let plan = sess.prql_to_lp(query).await.map_err(PyGlareDbError::from)?; let op = OperationInfo::new().with_query_text(query); @@ -182,7 +182,7 @@ impl Connection { /// con.execute('create table my_table (a int)') /// ``` pub fn execute(&mut self, py: Python<'_>, query: &str) -> PyResult { - let sess = self.sess.clone(); + let sess = self.session.clone(); let (_, exec_result) = wait_for_future(py, async move { let mut sess = sess.lock().await; let plan = sess diff --git a/bindings/python/src/error.rs b/bindings/python/src/error.rs index 611d188b9..4a31f83f6 100644 --- a/bindings/python/src/error.rs +++ b/bindings/python/src/error.rs @@ -25,15 +25,15 @@ impl PyGlareDbError { Self::Other(msg.to_string()) } } + impl From for PyErr { fn from(err: PyGlareDbError) -> Self { - use PyGlareDbError::*; match err { - Arrow(err) => ArrowErrorException::new_err(format!("{err:?}")), - Metastore(err) => MetastoreException::new_err(err.to_string()), - Exec(err) => ExecutionException::new_err(err.to_string()), - Anyhow(err) => PyRuntimeError::new_err(format!("{err:?}")), - Other(msg) => PyRuntimeError::new_err(msg), + PyGlareDbError::Arrow(err) => ArrowErrorException::new_err(format!("{err:?}")), + PyGlareDbError::Metastore(err) => MetastoreException::new_err(err.to_string()), + PyGlareDbError::Exec(err) => ExecutionException::new_err(err.to_string()), + PyGlareDbError::Anyhow(err) => PyRuntimeError::new_err(format!("{err:?}")), + PyGlareDbError::Other(msg) => PyRuntimeError::new_err(msg), } } } diff --git a/crates/cli/src/local.rs b/crates/cli/src/local.rs index 2bb837985..36fb85996 100644 --- a/crates/cli/src/local.rs +++ b/crates/cli/src/local.rs @@ -60,7 +60,7 @@ impl LocalSession { Engine::from_data_dir(opts.data_dir.as_ref()).await? }; - engine = engine.with_spill_path(opts.spill_path.clone()); + engine = engine.with_spill_path(opts.spill_path.clone())?; let sess = if let Some(url) = opts.cloud_url.clone() { let (exec_client, info_msg) = if opts.ignore_rpc_auth { diff --git a/crates/glaredb/Cargo.toml b/crates/glaredb/Cargo.toml index 4731641d6..96aa42ecc 100644 --- a/crates/glaredb/Cargo.toml +++ b/crates/glaredb/Cargo.toml @@ -7,4 +7,6 @@ edition.workspace = true workspace = true [dependencies] +url = { workspace = true } +sqlexec = { path = "../sqlexec" } derive_builder = "0.20.0" diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 1a2426689..397e4e652 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -1,19 +1,88 @@ use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::{Arc, Mutex}; use derive_builder::Builder; +use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; +use sqlexec::errors::ExecError; +use sqlexec::remote::client::RemoteClientType; +use url::Url; + #[derive(Default, Builder)] pub struct ConnectOptions { + #[builder(setter(into, strip_option))] + pub connection_target: Option, #[builder(setter(into, strip_option))] pub location: Option, #[builder(setter(into, strip_option))] pub spill_path: Option, - pub disable_tls: Option, #[builder(setter(strip_option))] pub storage_options: HashMap, + + #[builder] + pub disable_tls: Option, #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] #[builder(setter(into, strip_option))] pub cloud_addr: Option, + #[builder(default = "Some(RemoteClientType::Cli)")] + #[builder(setter(strip_option))] + pub client_type: Option, +} + +impl ConnectOptions { + pub fn backend(&self) -> EngineBackend { + if let Some(location) = self.location.clone() { + EngineBackend::Remote { + location, + options: self.storage_options.clone(), + } + } else if let Some(data_dir) = self.data_dir() { + EngineBackend::Local(data_dir) + } else { + EngineBackend::Memory + } + } + + fn data_dir(&self) -> Option { + match self.connection_target.clone() { + Some(s) => match Url::parse(&s) { + Ok(_) => None, + Err(_) => Some(PathBuf::from(s)), + }, + None => None, + } + } + + fn cloud_url(&self) -> Option { + self.connection_target + .clone() + .and_then(|v| Url::parse(&v).ok()) + } + + + pub async fn connect(&self) -> Result { + let mut engine = Engine::from_backend(self.backend()).await?; + + engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; + + let mut session = engine.default_local_session_context().await?; + + session + .create_client_session( + self.cloud_url(), + self.cloud_addr.clone().unwrap_or_default(), + self.disable_tls.unwrap_or_default(), + self.client_type.clone().unwrap(), + None, + ) + .await?; + + Ok(Connection { + _session: Arc::new(Mutex::new(session)), + _engine: Arc::new(engine), + }) + } } impl ConnectOptionsBuilder { @@ -30,3 +99,9 @@ impl ConnectOptionsBuilder { self.storage_options(opts) } } + + +pub struct Connection { + _session: Arc>, + _engine: Arc, +} diff --git a/crates/sqlexec/Cargo.toml b/crates/sqlexec/Cargo.toml index 64a7d394a..49edfa2d4 100644 --- a/crates/sqlexec/Cargo.toml +++ b/crates/sqlexec/Cargo.toml @@ -23,27 +23,28 @@ parser = { path = "../parser" } distexec = { path = "../distexec" } dashmap = "5.5.0" metastore = { path = "../metastore" } -thiserror.workspace = true +thiserror = { workspace = true } +tempfile = { workspace = true } tokio = { workspace = true } async-trait = { workspace = true } datafusion = { workspace = true } prost = { workspace = true } datafusion-proto = { workspace = true } -bytes = "1.4.0" futures = { workspace = true } tracing = { workspace = true } object_store = { workspace = true } -uuid = { version = "1.7.0", features = ["v4", "fast-rng", "macro-diagnostics"] } tonic = { workspace = true } +serde = { workspace = true } +reqwest = { workspace = true } +url = { workspace = true } +bytes = "1.4.0" +uuid = { version = "1.7.0", features = ["v4", "fast-rng", "macro-diagnostics"] } tokio-postgres = "0.7.8" once_cell = "1.19.0" -url.workspace = true parking_lot = "0.12.1" -serde = { workspace = true } -reqwest = { workspace = true } num_cpus = "1.16.0" [dev-dependencies] +metastore = { path = "../metastore" } tempfile = "3" tower = "0.4" -metastore = { path = "../metastore" } diff --git a/crates/sqlexec/src/engine.rs b/crates/sqlexec/src/engine.rs index 8f9387636..74c590594 100644 --- a/crates/sqlexec/src/engine.rs +++ b/crates/sqlexec/src/engine.rs @@ -27,6 +27,7 @@ use protogen::gen::metastore::service::metastore_service_client::MetastoreServic use protogen::rpcsrv::types::common; use sqlbuiltins::builtins::{SCHEMA_CURRENT_SESSION, SCHEMA_DEFAULT}; use telemetry::Tracker; +use tempfile; use tonic::transport::Channel; use tracing::{debug, info}; use url::Url; @@ -321,6 +322,7 @@ pub struct Engine { task_scheduler: Scheduler, /// Task executors. _task_executors: Vec, + _tmp_dir: Option, } pub enum EngineBackend { @@ -360,6 +362,7 @@ impl Engine { session_counter: Arc::new(AtomicU64::new(0)), task_scheduler, _task_executors: task_executors, + _tmp_dir: None, }) } @@ -419,9 +422,24 @@ impl Engine { self } - pub fn with_spill_path(mut self, spill_path: Option) -> Engine { - self.spill_path = spill_path; - self + pub fn with_spill_path(mut self, spill_path: Option) -> Result { + self.spill_path = match spill_path { + Some(path) => { + ensure_dir(&path)?; + Some(path) + } + None => { + self._tmp_dir = Some( + tempfile::Builder::new() + .prefix("glaredb") + .rand_bytes(8) + .tempdir()?, + ); + Some(self._tmp_dir.as_ref().unwrap().path().to_path_buf()) + } + }; + + Ok(self) } /// Get the current number of sessions. @@ -450,6 +468,12 @@ impl Engine { }) } + pub async fn default_local_session_context(&self) -> Result { + Ok(self + .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) + .await?) + } + /// Create a new untracked session. /// /// This does not increment the session counter. diff --git a/crates/sqlexec/src/remote/client.rs b/crates/sqlexec/src/remote/client.rs index 5abd818be..e42b5ffca 100644 --- a/crates/sqlexec/src/remote/client.rs +++ b/crates/sqlexec/src/remote/client.rs @@ -122,7 +122,7 @@ pub struct AuthenticateClientError { pub msg: String, } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum RemoteClientType { Cli, Node, diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index 2550605f4..e364a9a08 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -38,6 +38,7 @@ use pgrepr::format::Format; use pgrepr::notice::{Notice, NoticeSeverity, SqlState}; use sqlbuiltins::functions::BuiltinScalarUDF; use telemetry::Tracker; +use url::Url; use uuid::Uuid; use crate::context::local::{LocalSessionContext, Portal, PreparedStatement}; @@ -51,7 +52,7 @@ use crate::planner::physical_plan::{ GENERIC_OPERATION_PHYSICAL_SCHEMA, }; use crate::planner::session_planner::SessionPlanner; -use crate::remote::client::RemoteClient; +use crate::remote::client::{RemoteClient, RemoteClientType}; use crate::remote::planner::{DDLExtensionPlanner, RemotePhysicalPlanner}; static EMPTY_EXEC_PLAN: Lazy> = @@ -430,6 +431,29 @@ impl Session { self.ctx.register_function(udf).await } + pub async fn create_client_session( + &mut self, + cloud_url: Option, + cloud_addr: String, + disable_tls: bool, + client_type: RemoteClientType, + test_db_id: Option, + ) -> Result<()> { + let client = match cloud_url { + Some(url) => { + RemoteClient::connect_with_proxy_destination( + url.try_into()?, + cloud_addr, + disable_tls, + client_type, + ) + .await? + } + None => return Ok(()), + }; + self.attach_remote_session(client, test_db_id).await + } + pub async fn attach_remote_session( &mut self, client: RemoteClient, From daa757d61ffad26c03c22f0c33e3d395e3031e89 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 12:26:18 -0400 Subject: [PATCH 05/34] wip --- crates/glaredb/src/lib.rs | 66 ++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 397e4e652..acba2f39f 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -6,6 +6,8 @@ use derive_builder::Builder; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; +use sqlexec::session::ExecutionResult; +use sqlexec::OperationInfo; use url::Url; @@ -31,7 +33,30 @@ pub struct ConnectOptions { } impl ConnectOptions { - pub fn backend(&self) -> EngineBackend { + pub async fn connect(&self) -> Result { + let mut engine = Engine::from_backend(self.backend()).await?; + + engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; + + let mut session = engine.default_local_session_context().await?; + + session + .create_client_session( + self.cloud_url(), + self.cloud_addr.clone().unwrap_or_default(), + self.disable_tls.unwrap_or_default(), + self.client_type.clone().unwrap(), + None, + ) + .await?; + + Ok(Connection { + _session: Arc::new(Mutex::new(session)), + _engine: Arc::new(engine), + }) + } + + fn backend(&self) -> EngineBackend { if let Some(location) = self.location.clone() { EngineBackend::Remote { location, @@ -59,30 +84,6 @@ impl ConnectOptions { .clone() .and_then(|v| Url::parse(&v).ok()) } - - - pub async fn connect(&self) -> Result { - let mut engine = Engine::from_backend(self.backend()).await?; - - engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; - - let mut session = engine.default_local_session_context().await?; - - session - .create_client_session( - self.cloud_url(), - self.cloud_addr.clone().unwrap_or_default(), - self.disable_tls.unwrap_or_default(), - self.client_type.clone().unwrap(), - None, - ) - .await?; - - Ok(Connection { - _session: Arc::new(Mutex::new(session)), - _engine: Arc::new(engine), - }) - } } impl ConnectOptionsBuilder { @@ -105,3 +106,18 @@ pub struct Connection { _session: Arc>, _engine: Arc, } + +// TODO (create blocking and non-blocking variants) +impl Connection { + pub async fn sql(&mut self, query: &str) -> Result { + let mut ses = self._session.lock().unwrap(); + + let plan = ses.create_logical_plan(query).await?; + let op = OperationInfo::new().with_query_text(query); + + let (ep, execres) = ses.execute_logical_plan(plan, &op).await?; + + + Ok(stream) + } +} From ae73d453e42d9742dea11accef052d7fe00956bd Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 12:31:17 -0400 Subject: [PATCH 06/34] fix: merge related build failure --- crates/cli/src/bin/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cli/src/bin/main.rs b/crates/cli/src/bin/main.rs index 8199d20b3..5a90840b9 100644 --- a/crates/cli/src/bin/main.rs +++ b/crates/cli/src/bin/main.rs @@ -1,7 +1,7 @@ use anyhow::Result; use clap::{Parser, ValueEnum}; -use glaredb::args::LocalArgs; -use glaredb::commands::Commands; +use cli::args::LocalArgs; +use cli::commands::Commands; #[derive(Debug, Clone, Copy, ValueEnum, Default)] enum LoggingMode { From 115d5ef6e01863f287a7588400aa4021ae711573 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 18:13:31 -0400 Subject: [PATCH 07/34] feat: implementation of the connection methods --- Cargo.lock | 2 + crates/datasources/src/common/util.rs | 1 + crates/glaredb/Cargo.toml | 4 +- crates/glaredb/src/lib.rs | 169 ++++++++++++++++++++++---- crates/sqlexec/src/errors.rs | 12 ++ 5 files changed, 162 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e46d865fc..ebd6536a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3552,7 +3552,9 @@ dependencies = [ name = "glaredb" version = "0.9.2" dependencies = [ + "datafusion", "derive_builder 0.20.0", + "futures", "sqlexec", "url", ] diff --git a/crates/datasources/src/common/util.rs b/crates/datasources/src/common/util.rs index 4d66b09c8..48f90c361 100644 --- a/crates/datasources/src/common/util.rs +++ b/crates/datasources/src/common/util.rs @@ -205,6 +205,7 @@ pub fn create_count_record_batch(count: u64) -> RecordBatch { .unwrap() } + #[cfg(test)] mod tests { use datafusion::arrow::array::{ diff --git a/crates/glaredb/Cargo.toml b/crates/glaredb/Cargo.toml index 96aa42ecc..d7c9dc23c 100644 --- a/crates/glaredb/Cargo.toml +++ b/crates/glaredb/Cargo.toml @@ -7,6 +7,8 @@ edition.workspace = true workspace = true [dependencies] -url = { workspace = true } sqlexec = { path = "../sqlexec" } +url = { workspace = true } +datafusion = { workspace = true } +futures = { workspace = true } derive_builder = "0.20.0" diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index acba2f39f..f7cd6a047 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -1,8 +1,19 @@ use std::collections::HashMap; use std::path::PathBuf; -use std::sync::{Arc, Mutex}; - +use std::sync::Arc; + +use datafusion::arrow::array::{StringArray, UInt64Array}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::error::DataFusionError; +use datafusion::logical_expr::LogicalPlan; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +// public re-export so downstream users of this package don't have to +// directly depend on DF (and our version no-less) to use our interfaces. +pub use datafusion::physical_plan::SendableRecordBatchStream; use derive_builder::Builder; +use futures::lock::Mutex; +use futures::stream::StreamExt; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; @@ -32,6 +43,21 @@ pub struct ConnectOptions { pub client_type: Option, } +impl ConnectOptionsBuilder { + pub fn set_storage_option( + &mut self, + key: impl Into, + value: impl Into, + ) -> &mut Self { + let mut opts = match self.storage_options.to_owned() { + Some(opts) => opts, + None => HashMap::new(), + }; + opts.insert(key.into(), value.into()); + self.storage_options(opts) + } +} + impl ConnectOptions { pub async fn connect(&self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; @@ -51,7 +77,7 @@ impl ConnectOptions { .await?; Ok(Connection { - _session: Arc::new(Mutex::new(session)), + session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), }) } @@ -86,38 +112,131 @@ impl ConnectOptions { } } -impl ConnectOptionsBuilder { - pub fn set_storage_option( - &mut self, - key: impl Into, - value: impl Into, - ) -> &mut Self { - let mut opts = match self.storage_options.to_owned() { - Some(opts) => opts, - None => HashMap::new(), - }; - opts.insert(key.into(), value.into()); - self.storage_options(opts) - } -} - - pub struct Connection { - _session: Arc>, + session: Arc>, _engine: Arc, } -// TODO (create blocking and non-blocking variants) impl Connection { - pub async fn sql(&mut self, query: &str) -> Result { - let mut ses = self._session.lock().unwrap(); + // TODO: + // - decide if we want to actually return the DF Sendable types + // (putting the schema in the wrapper is annoying and it + // doesn't get us much, but a sendable) stream is nice. + // - do we want to have sync methods that return + // Stream> (e.g. make it fully lazy + // and flatten errors) or Iterator>? + // Both of these seem useful in some (many?) cases. + // - prql helper. + // - regardless, wrapping/aliasing SRBS values in some way so + // that we can provide helper methods (exhaust, converters(?)) + // might be good. + pub async fn execute(&self, query: &str) -> Result { + let mut ses = self.session.lock().await; + let plan = ses.create_logical_plan(query).await?; + let op = OperationInfo::new().with_query_text(query); + + Ok(Self::process_result( + ses.execute_logical_plan(plan, &op).await?.1, + )) + } + pub async fn query(&self, query: &str) -> Result { + let mut ses = self.session.lock().await; let plan = ses.create_logical_plan(query).await?; let op = OperationInfo::new().with_query_text(query); - let (ep, execres) = ses.execute_logical_plan(plan, &op).await?; + match plan.to_owned().try_into_datafusion_plan()? { + LogicalPlan::Dml(_) + | LogicalPlan::Ddl(_) + | LogicalPlan::Copy(_) + | LogicalPlan::Extension(_) => Ok(Self::process_result( + ses.execute_logical_plan(plan, &op).await?.1, + )), + _ => { + let ses_clone = self.session.clone(); + + Ok(Self::process_result(ExecutionResult::Query { + stream: Box::pin(RecordBatchStreamAdapter::new( + Arc::new(plan.output_schema().unwrap_or_else(|| Schema::empty())), + futures::stream::once(async move { + let mut ses = ses_clone.lock().await; + match ses.execute_logical_plan(plan, &op).await { + Ok((_, res)) => Self::process_result(res), + Err(e) => Self::handle_error(e), + } + }) + .flatten(), + )), + })) + } + } + } + + fn handle_error(err: impl Into) -> SendableRecordBatchStream { + Self::process_result(ExecutionResult::Error(err.into())) + } + + fn process_result(res: ExecutionResult) -> SendableRecordBatchStream { + match res { + ExecutionResult::Query { stream } => stream, + ExecutionResult::Error(e) => Box::pin(RecordBatchStreamAdapter::new( + Arc::new(Schema::empty()), + futures::stream::once(async move { Err(e) }), + )), + ExecutionResult::InsertSuccess { rows_inserted } => { + Self::numeric_result("count", rows_inserted as u64) + } + ExecutionResult::DeleteSuccess { deleted_rows } => { + Self::numeric_result("count", deleted_rows as u64) + } + ExecutionResult::UpdateSuccess { updated_rows } => { + Self::numeric_result("count", updated_rows as u64) + } + _ => Self::operation_result("result", res.to_string()), + } + } + fn numeric_result(field_name: impl Into, num: u64) -> SendableRecordBatchStream { + let schema = Arc::new(Schema::new(vec![Field::new( + field_name, + DataType::UInt64, + false, + )])); + + Box::pin(RecordBatchStreamAdapter::new( + schema.clone(), + futures::stream::once(async move { + RecordBatch::try_new( + schema.clone(), + vec![Arc::new(UInt64Array::from_value(num, 1))], + ) + .map_err(DataFusionError::from) + }), + )) + } - Ok(stream) + fn operation_result( + field_name: impl Into, + op: impl Into, + ) -> SendableRecordBatchStream { + let schema = Arc::new(Schema::new(vec![Field::new( + field_name, + DataType::Utf8, + false, + )])); + let op = op.into(); + + Box::pin(RecordBatchStreamAdapter::new( + schema.clone(), + futures::stream::once(async move { + RecordBatch::try_new( + schema.clone(), + vec![Arc::new(StringArray::from_iter_values( + vec![op].into_iter(), + ))], + ) + .map_err(DataFusionError::from) + }), + )) } } diff --git a/crates/sqlexec/src/errors.rs b/crates/sqlexec/src/errors.rs index b815e1f51..27fe7866b 100644 --- a/crates/sqlexec/src/errors.rs +++ b/crates/sqlexec/src/errors.rs @@ -167,6 +167,18 @@ pub enum ExecError { Metastore(#[from] metastore::errors::MetastoreError), } +impl From for datafusion::error::DataFusionError { + fn from(e: ExecError) -> Self { + match e { + ExecError::DataFusion(e) => e, + ExecError::Arrow(e) => datafusion::error::DataFusionError::ArrowError(e, None), + ExecError::Io(e) => datafusion::error::DataFusionError::IoError(e), + _ => datafusion::error::DataFusionError::External(Box::new(e)), + } + } +} + + pub type Result = std::result::Result; #[allow(unused_macros)] From 7e14f4da5f0ecff091f151a4c15d0ca7dcdf78fa Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 19:34:51 -0400 Subject: [PATCH 08/34] fix: unittest build --- crates/cli/tests/drop_tables_test.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/cli/tests/drop_tables_test.rs b/crates/cli/tests/drop_tables_test.rs index 1d2cc419e..2f08fd92f 100644 --- a/crates/cli/tests/drop_tables_test.rs +++ b/crates/cli/tests/drop_tables_test.rs @@ -1,6 +1,6 @@ -use glaredb::args::{LocalClientOpts, StorageConfigArgs}; -use glaredb::local::LocalSession; -use glaredb::server::ComputeServer; +use cli::args::{LocalClientOpts, StorageConfigArgs}; +use cli::local::LocalSession; +use cli::server::ComputeServer; use tokio::net::TcpListener; #[tokio::test] From e150dc7188caccd20ee1206b362a991d06c20761 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 19:44:04 -0400 Subject: [PATCH 09/34] fix: explicit reference --- crates/cli/tests/drop_tables_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cli/tests/drop_tables_test.rs b/crates/cli/tests/drop_tables_test.rs index 2f08fd92f..091fa5e28 100644 --- a/crates/cli/tests/drop_tables_test.rs +++ b/crates/cli/tests/drop_tables_test.rs @@ -30,7 +30,7 @@ async fn test_drop_tables_removes_files() { }, timing: false, ignore_rpc_auth: true, - mode: glaredb::args::OutputMode::Table, + mode: cli::args::OutputMode::Table, max_width: None, max_rows: None, disable_tls: true, From c1012218783682bb6c791ae1d16a38e2a620c97f Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 20:28:46 -0400 Subject: [PATCH 10/34] fix: binary resolution --- crates/cli/tests/setup.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cli/tests/setup.rs b/crates/cli/tests/setup.rs index 2cda08f4e..4495c0900 100644 --- a/crates/cli/tests/setup.rs +++ b/crates/cli/tests/setup.rs @@ -3,7 +3,7 @@ use std::time::Duration; use assert_cmd::cmd::Command; pub fn make_cli() -> Command { - Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Failed to find binary") + Command::cargo_bin("glaredb").expect("Failed to find binary") } #[allow(dead_code)] // Used in the tests. IDK why clippy is complaining about it. From 592bda2f2cc0f67513e57f2db65c9366db2e7c60 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 28 Mar 2024 22:35:31 -0400 Subject: [PATCH 11/34] fix: python binding error handling --- bindings/python/src/connect.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index d8d2fe606..c928880c9 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -102,7 +102,9 @@ pub fn connect( EngineBackend::Memory }; - let mut engine = Engine::from_backend(backend).await?; + let mut engine = Engine::from_backend(backend) + .await + .map_err(PyGlareDbError::from)?; // If spill path not provided, default to some tmp dir. let spill_path = match spill_path { From 90f783d55c4c692b888951eaa834dac4d4385898 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 29 Mar 2024 12:32:24 -0400 Subject: [PATCH 12/34] fix: lint error --- crates/glaredb/src/lib.rs | 8 ++++++++ crates/sqlexec/src/engine.rs | 5 ++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index f7cd6a047..7363a7e27 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -59,6 +59,14 @@ impl ConnectOptionsBuilder { } impl ConnectOptions { + pub fn new_in_memory() -> Self { + Self { + location: None, + connection_target: None, + ..Default::default() + } + } + pub async fn connect(&self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; diff --git a/crates/sqlexec/src/engine.rs b/crates/sqlexec/src/engine.rs index 74c590594..11a1d0b36 100644 --- a/crates/sqlexec/src/engine.rs +++ b/crates/sqlexec/src/engine.rs @@ -469,9 +469,8 @@ impl Engine { } pub async fn default_local_session_context(&self) -> Result { - Ok(self - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await?) + self.new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) + .await } /// Create a new untracked session. From 9486f21ef23260ec1005c6c3c511db88731e08c5 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 29 Mar 2024 14:30:43 -0400 Subject: [PATCH 13/34] feat: prql helpers and sync stream producers --- bindings/nodejs/src/connection.rs | 2 +- crates/glaredb/src/lib.rs | 131 +++++++++++++++++++++++++----- 2 files changed, 111 insertions(+), 22 deletions(-) diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index 23fbd80b5..1df4030f9 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -207,7 +207,7 @@ impl Connection { /// import glaredb from "@glaredb/glaredb" /// /// let con = glaredb.connect() - /// let cursor = await con.sql('from my_table | take 1'); + /// let cursor = await con.prql('from my_table | take 1'); /// await cursor.show() /// ``` /// diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 7363a7e27..bb20a231f 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -1,19 +1,20 @@ use std::collections::HashMap; use std::path::PathBuf; +use std::pin::Pin; use std::sync::Arc; use datafusion::arrow::array::{StringArray, UInt64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; -use datafusion::arrow::record_batch::RecordBatch; -use datafusion::error::DataFusionError; -use datafusion::logical_expr::LogicalPlan; -use datafusion::physical_plan::stream::RecordBatchStreamAdapter; // public re-export so downstream users of this package don't have to // directly depend on DF (and our version no-less) to use our interfaces. +pub use datafusion::arrow::record_batch::RecordBatch; +pub use datafusion::error::DataFusionError; +use datafusion::logical_expr::LogicalPlan; +use datafusion::physical_plan::stream::RecordBatchStreamAdapter; pub use datafusion::physical_plan::SendableRecordBatchStream; use derive_builder::Builder; use futures::lock::Mutex; -use futures::stream::StreamExt; +use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; @@ -125,22 +126,36 @@ pub struct Connection { _engine: Arc, } +pub struct RecordStream(Pin> + Send>>); + +impl Into for SendableRecordBatchStream { + fn into(self) -> RecordStream { + RecordStream(self.boxed()) + } +} + +impl RecordStream { + pub fn all(&mut self) -> Result, DataFusionError> { + futures::executor::block_on(async move { + let mut out = Vec::new(); + + while let Some(batch) = self.0.next().await { + out.push(batch?) + } + Ok(out) + }) + } +} + + impl Connection { - // TODO: - // - decide if we want to actually return the DF Sendable types - // (putting the schema in the wrapper is annoying and it - // doesn't get us much, but a sendable) stream is nice. - // - do we want to have sync methods that return - // Stream> (e.g. make it fully lazy - // and flatten errors) or Iterator>? - // Both of these seem useful in some (many?) cases. - // - prql helper. - // - regardless, wrapping/aliasing SRBS values in some way so - // that we can provide helper methods (exhaust, converters(?)) - // might be good. - pub async fn execute(&self, query: &str) -> Result { + pub async fn execute( + &self, + query: impl Into, + ) -> Result { let mut ses = self.session.lock().await; - let plan = ses.create_logical_plan(query).await?; + let query = query.into(); + let plan = ses.create_logical_plan(&query).await?; let op = OperationInfo::new().with_query_text(query); Ok(Self::process_result( @@ -148,9 +163,35 @@ impl Connection { )) } - pub async fn query(&self, query: &str) -> Result { + pub fn call(&self, query: impl Into) -> RecordStream { + let ses = self.session.clone(); + let query = query.into(); + + RecordStream(Box::pin( + futures::stream::once(async move { + let mut ses = ses.lock().await; + let plan = match ses.create_logical_plan(&query).await { + Ok(p) => p, + Err(e) => return Self::handle_error(e), + }; + let op = OperationInfo::new().with_query_text(query); + + match ses.execute_logical_plan(plan, &op).await { + Ok((_, stream)) => Self::process_result(stream.into()), + Err(err) => Self::handle_error(err), + } + }) + .flatten(), + )) + } + + pub async fn query( + &self, + query: impl Into, + ) -> Result { let mut ses = self.session.lock().await; - let plan = ses.create_logical_plan(query).await?; + let query = query.into(); + let plan = ses.create_logical_plan(&query).await?; let op = OperationInfo::new().with_query_text(query); match plan.to_owned().try_into_datafusion_plan()? { @@ -180,6 +221,54 @@ impl Connection { } } + pub async fn prql_query( + &self, + query: impl Into, + ) -> Result { + let mut ses = self.session.lock().await; + let query = query.into(); + let plan = ses.prql_to_lp(&query).await?; + let op = OperationInfo::new().with_query_text(query); + + let ses_clone = self.session.clone(); + Ok(Self::process_result(ExecutionResult::Query { + stream: Box::pin(RecordBatchStreamAdapter::new( + Arc::new(plan.output_schema().unwrap_or_else(|| Schema::empty())), + futures::stream::once(async move { + let mut ses = ses_clone.lock().await; + match ses.execute_logical_plan(plan, &op).await { + Ok((_, res)) => Self::process_result(res), + Err(e) => Self::handle_error(e), + } + }) + .flatten(), + )), + })) + } + + pub fn prql_call(&self, query: impl Into) -> RecordStream { + let ses = self.session.clone(); + let query = query.into(); + + RecordStream(Box::pin( + futures::stream::once(async move { + let mut ses = ses.lock().await; + let plan = match ses.prql_to_lp(&query).await { + Ok(p) => p, + Err(e) => return Self::handle_error(e), + }; + let op = OperationInfo::new().with_query_text(query); + + match ses.execute_logical_plan(plan, &op).await { + Ok((_, stream)) => Self::process_result(stream.into()), + Err(err) => Self::handle_error(err), + } + }) + .flatten(), + )) + } + + fn handle_error(err: impl Into) -> SendableRecordBatchStream { Self::process_result(ExecutionResult::Error(err.into())) } From f143eeaaf5d683e1c50828abd56296668218af23 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 29 Mar 2024 14:37:20 -0400 Subject: [PATCH 14/34] feat: addtional converter --- crates/glaredb/src/lib.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index bb20a231f..49f728385 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -8,7 +8,7 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema}; // public re-export so downstream users of this package don't have to // directly depend on DF (and our version no-less) to use our interfaces. pub use datafusion::arrow::record_batch::RecordBatch; -pub use datafusion::error::DataFusionError; +use datafusion::error::DataFusionError; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; pub use datafusion::physical_plan::SendableRecordBatchStream; @@ -134,6 +134,15 @@ impl Into for SendableRecordBatchStream { } } +impl Into for Result { + fn into(self) -> RecordStream { + match self { + Ok(stream) => stream.into(), + Err(err) => RecordStream(Connection::handle_error(err).boxed()), + } + } +} + impl RecordStream { pub fn all(&mut self) -> Result, DataFusionError> { futures::executor::block_on(async move { From a34f5db38508c62d183266783946bf3866e873d5 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 29 Mar 2024 15:00:22 -0400 Subject: [PATCH 15/34] chore: reorganization --- crates/glaredb/src/lib.rs | 47 ++++++++++++++------------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 49f728385..cb49fb6cc 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -126,36 +126,6 @@ pub struct Connection { _engine: Arc, } -pub struct RecordStream(Pin> + Send>>); - -impl Into for SendableRecordBatchStream { - fn into(self) -> RecordStream { - RecordStream(self.boxed()) - } -} - -impl Into for Result { - fn into(self) -> RecordStream { - match self { - Ok(stream) => stream.into(), - Err(err) => RecordStream(Connection::handle_error(err).boxed()), - } - } -} - -impl RecordStream { - pub fn all(&mut self) -> Result, DataFusionError> { - futures::executor::block_on(async move { - let mut out = Vec::new(); - - while let Some(batch) = self.0.next().await { - out.push(batch?) - } - Ok(out) - }) - } -} - impl Connection { pub async fn execute( @@ -346,3 +316,20 @@ impl Connection { )) } } + +pub struct RecordStream(Pin> + Send>>); + +impl Into for SendableRecordBatchStream { + fn into(self) -> RecordStream { + RecordStream(self.boxed()) + } +} + +impl Into for Result { + fn into(self) -> RecordStream { + match self { + Ok(stream) => stream.into(), + Err(err) => RecordStream(Connection::handle_error(err).boxed()), + } + } +} From 6439f7e18f5c2bfad03157c60cae3b392bb83b76 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 29 Mar 2024 15:19:03 -0400 Subject: [PATCH 16/34] fix: lint --- bindings/nodejs/src/connection.rs | 3 +-- bindings/python/src/connect.rs | 3 +-- crates/glaredb/src/lib.rs | 20 ++++++++++---------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index 1df4030f9..7867eda52 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -5,9 +5,8 @@ use std::sync::Arc; use datafusion::logical_expr::LogicalPlan as DFLogicalPlan; use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; -use ioutil::ensure_dir; use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig, TrackedSession}; -use sqlexec::remote::client::{RemoteClient, RemoteClientType}; +use sqlexec::remote::client::RemoteClientType; use sqlexec::{LogicalPlan, OperationInfo}; use url::Url; diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index e29f717f3..5043c3bfd 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -7,10 +7,9 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; -use datafusion_ext::vars::SessionVars; use futures::lock::Mutex; use pyo3::prelude::*; -use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig}; +use sqlexec::engine::{Engine, EngineBackend}; use sqlexec::remote::client::RemoteClientType; use url::Url; diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index cb49fb6cc..9ec825b5d 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -156,7 +156,7 @@ impl Connection { let op = OperationInfo::new().with_query_text(query); match ses.execute_logical_plan(plan, &op).await { - Ok((_, stream)) => Self::process_result(stream.into()), + Ok((_, stream)) => Self::process_result(stream), Err(err) => Self::handle_error(err), } }) @@ -185,7 +185,7 @@ impl Connection { Ok(Self::process_result(ExecutionResult::Query { stream: Box::pin(RecordBatchStreamAdapter::new( - Arc::new(plan.output_schema().unwrap_or_else(|| Schema::empty())), + Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)), futures::stream::once(async move { let mut ses = ses_clone.lock().await; match ses.execute_logical_plan(plan, &op).await { @@ -212,7 +212,7 @@ impl Connection { let ses_clone = self.session.clone(); Ok(Self::process_result(ExecutionResult::Query { stream: Box::pin(RecordBatchStreamAdapter::new( - Arc::new(plan.output_schema().unwrap_or_else(|| Schema::empty())), + Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)), futures::stream::once(async move { let mut ses = ses_clone.lock().await; match ses.execute_logical_plan(plan, &op).await { @@ -239,7 +239,7 @@ impl Connection { let op = OperationInfo::new().with_query_text(query); match ses.execute_logical_plan(plan, &op).await { - Ok((_, stream)) => Self::process_result(stream.into()), + Ok((_, stream)) => Self::process_result(stream), Err(err) => Self::handle_error(err), } }) @@ -319,15 +319,15 @@ impl Connection { pub struct RecordStream(Pin> + Send>>); -impl Into for SendableRecordBatchStream { - fn into(self) -> RecordStream { - RecordStream(self.boxed()) +impl From for RecordStream { + fn from(val: SendableRecordBatchStream) -> RecordStream { + RecordStream(val.boxed()) } } -impl Into for Result { - fn into(self) -> RecordStream { - match self { +impl From> for RecordStream { + fn from(val: Result) -> RecordStream { + match val { Ok(stream) => stream.into(), Err(err) => RecordStream(Connection::handle_error(err).boxed()), } From 14c7a61961ff33f00513209b4a13c9ab1f60f1dd Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 4 Apr 2024 20:47:16 -0400 Subject: [PATCH 17/34] chore: backport driver changes from python integration branch --- crates/glaredb/src/lib.rs | 35 +++++++++++++++++++++-------- crates/sqlexec/src/context/local.rs | 8 +++---- crates/sqlexec/src/remote/client.rs | 9 ++++++++ crates/sqlexec/src/session.rs | 2 +- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 9ec825b5d..456077a83 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -8,7 +8,7 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema}; // public re-export so downstream users of this package don't have to // directly depend on DF (and our version no-less) to use our interfaces. pub use datafusion::arrow::record_batch::RecordBatch; -use datafusion::error::DataFusionError; +pub use datafusion::error::DataFusionError; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; pub use datafusion::physical_plan::SendableRecordBatchStream; @@ -16,6 +16,7 @@ use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; +use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; @@ -25,16 +26,16 @@ use url::Url; #[derive(Default, Builder)] pub struct ConnectOptions { - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub connection_target: Option, - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub location: Option, - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub spill_path: Option, - #[builder(setter(strip_option))] + #[builder(setter(into), default = "HashMap::new()")] pub storage_options: HashMap, - #[builder] + #[builder(setter(strip_option))] pub disable_tls: Option, #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] #[builder(setter(into, strip_option))] @@ -42,10 +43,12 @@ pub struct ConnectOptions { #[builder(default = "Some(RemoteClientType::Cli)")] #[builder(setter(strip_option))] pub client_type: Option, + #[builder(setter(strip_option))] + pub environment_reader: Option>>, } impl ConnectOptionsBuilder { - pub fn set_storage_option( + pub fn storage_option( &mut self, key: impl Into, value: impl Into, @@ -57,6 +60,11 @@ impl ConnectOptionsBuilder { opts.insert(key.into(), value.into()); self.storage_options(opts) } + + pub fn set_storage_options(&mut self, opts: Option>) -> &mut Self { + self.storage_options = opts; + self + } } impl ConnectOptions { @@ -68,7 +76,12 @@ impl ConnectOptions { } } - pub async fn connect(&self) -> Result { + pub fn with_env_reader(&mut self, reader: Arc>) -> &mut Self { + self.environment_reader.replace(reader); + self + } + + pub async fn connect(&mut self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; @@ -80,11 +93,15 @@ impl ConnectOptions { self.cloud_url(), self.cloud_addr.clone().unwrap_or_default(), self.disable_tls.unwrap_or_default(), - self.client_type.clone().unwrap(), + self.client_type.clone().unwrap_or_default(), None, ) .await?; + if let Some(env_reader) = self.environment_reader.take() { + session.register_env_reader(env_reader) + } + Ok(Connection { session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index f6609d052..1814a290e 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -73,7 +73,7 @@ pub struct LocalSessionContext { /// Datafusion session context used for planning and execution. df_ctx: DfSessionContext, /// Read tables from the environment. - env_reader: Option>, + env_reader: Option>>, /// Task scheduler. task_scheduler: Scheduler, /// Notices that should be sent to the user. @@ -217,12 +217,12 @@ impl LocalSessionContext { Ok(()) } - pub fn register_env_reader(&mut self, env_reader: Box) { + pub fn register_env_reader(&mut self, env_reader: Arc>) { self.env_reader = Some(env_reader); } - pub fn get_env_reader(&self) -> Option<&dyn EnvironmentReader> { - self.env_reader.as_deref() + pub fn get_env_reader(&self) -> Option>> { + self.env_reader.clone() } pub fn get_metrics_handler(&self) -> SessionMetricsHandler { diff --git a/crates/sqlexec/src/remote/client.rs b/crates/sqlexec/src/remote/client.rs index e42b5ffca..3eeeac452 100644 --- a/crates/sqlexec/src/remote/client.rs +++ b/crates/sqlexec/src/remote/client.rs @@ -127,13 +127,22 @@ pub enum RemoteClientType { Cli, Node, Python, + Rust, } + +impl Default for RemoteClientType { + fn default() -> Self { + Self::Rust + } +} + impl fmt::Display for RemoteClientType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { RemoteClientType::Cli => write!(f, "cli"), RemoteClientType::Node => write!(f, "node"), RemoteClientType::Python => write!(f, "python"), + RemoteClientType::Rust => write!(f, "Rust"), } } } diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index e364a9a08..8e794b1fb 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -466,7 +466,7 @@ impl Session { self.ctx.get_session_catalog() } - pub fn register_env_reader(&mut self, env_reader: Box) { + pub fn register_env_reader(&mut self, env_reader: Arc>) { self.ctx.register_env_reader(env_reader); } From 74ef043b2160846d11900aca6e9709f85f55fcd5 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 5 Apr 2024 15:40:58 -0400 Subject: [PATCH 18/34] fix: backport additional features from binding integration --- crates/glaredb/src/lib.rs | 261 ++++++++++++++++++++++---------------- 1 file changed, 154 insertions(+), 107 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 456077a83..9a7bd10bd 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -1,7 +1,9 @@ use std::collections::HashMap; +use std::fmt::Debug; use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; +use std::task::{Context, Poll}; use datafusion::arrow::array::{StringArray, UInt64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; @@ -138,71 +140,154 @@ impl ConnectOptions { } } +#[derive(Clone)] pub struct Connection { session: Arc>, _engine: Arc, } - impl Connection { - pub async fn execute( - &self, - query: impl Into, - ) -> Result { - let mut ses = self.session.lock().await; - let query = query.into(); - let plan = ses.create_logical_plan(&query).await?; - let op = OperationInfo::new().with_query_text(query); - - Ok(Self::process_result( - ses.execute_logical_plan(plan, &op).await?.1, - )) + pub fn execute(&self, query: impl Into) -> Operation { + Operation { + op: OperationType::Execute, + query: query.into(), + conn: Arc::new(self.clone()), + schema: None, + } } - pub fn call(&self, query: impl Into) -> RecordStream { - let ses = self.session.clone(); - let query = query.into(); + pub fn sql(&self, query: impl Into) -> Operation { + Operation { + op: OperationType::Sql, + query: query.into(), + conn: Arc::new(self.clone()), + schema: None, + } + } - RecordStream(Box::pin( - futures::stream::once(async move { - let mut ses = ses.lock().await; - let plan = match ses.create_logical_plan(&query).await { - Ok(p) => p, - Err(e) => return Self::handle_error(e), - }; - let op = OperationInfo::new().with_query_text(query); - - match ses.execute_logical_plan(plan, &op).await { - Ok((_, stream)) => Self::process_result(stream), - Err(err) => Self::handle_error(err), - } - }) - .flatten(), - )) + pub fn prql(&self, query: impl Into) -> Operation { + Operation { + op: OperationType::Prql, + query: query.into(), + conn: Arc::new(self.clone()), + schema: None, + } } +} - pub async fn query( - &self, - query: impl Into, - ) -> Result { - let mut ses = self.session.lock().await; - let query = query.into(); - let plan = ses.create_logical_plan(&query).await?; - let op = OperationInfo::new().with_query_text(query); - - match plan.to_owned().try_into_datafusion_plan()? { - LogicalPlan::Dml(_) - | LogicalPlan::Ddl(_) - | LogicalPlan::Copy(_) - | LogicalPlan::Extension(_) => Ok(Self::process_result( - ses.execute_logical_plan(plan, &op).await?.1, - )), - _ => { - let ses_clone = self.session.clone(); +pub struct RecordStream(Pin> + Send>>); + +impl Stream for RecordStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.0.poll_next_unpin(cx) + } +} + +impl From for RecordStream { + fn from(val: SendableRecordBatchStream) -> Self { + RecordStream(val.boxed()) + } +} + +impl From> for RecordStream { + fn from(val: Result) -> Self { + match val { + Ok(stream) => stream.into(), + Err(err) => RecordStream(Operation::handle_error(err).boxed()), + } + } +} + +impl From> for RecordStream { + fn from(val: Result) -> Self { + match val { + Ok(stream) => stream.into(), + Err(err) => RecordStream(Operation::handle_error(err).boxed()), + } + } +} + +#[derive(Debug, Clone)] +enum OperationType { + Sql, + Prql, + Execute, +} + +#[derive(Clone)] +#[must_use = "operations do nothing unless call() or execute() run"] +pub struct Operation { + op: OperationType, + query: String, + conn: Arc, + schema: Option>, +} + +impl Debug for Operation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Operation<{:?}>(\"{:?}\")", self.op, self.query) + } +} +impl Operation { + pub fn schema(&self) -> Option> { + self.schema.clone() + } + + pub async fn execute(&mut self) -> Result { + match self.op { + OperationType::Sql => { + let mut ses = self.conn.session.lock().await; + let plan = ses.create_logical_plan(&self.query).await?; + let op = OperationInfo::new().with_query_text(self.query.clone()); + let schema = self.schema.insert( + plan.output_schema() + .map(Arc::new) + .unwrap_or_else(|| Arc::new(Schema::empty())), + ); + + match plan.to_owned().try_into_datafusion_plan()? { + LogicalPlan::Dml(_) + | LogicalPlan::Ddl(_) + | LogicalPlan::Copy(_) + | LogicalPlan::Extension(_) => Ok(Self::process_result( + ses.execute_logical_plan(plan, &op).await?.1, + )), + _ => { + let ses_clone = self.conn.session.clone(); + + Ok(Self::process_result(ExecutionResult::Query { + stream: Box::pin(RecordBatchStreamAdapter::new( + schema.clone(), + futures::stream::once(async move { + let mut ses = ses_clone.lock().await; + match ses.execute_logical_plan(plan, &op).await { + Ok((_, res)) => Self::process_result(res), + Err(e) => Self::handle_error(e), + } + }) + .flatten(), + )), + })) + } + } + } + OperationType::Prql => { + let mut ses = self.conn.session.lock().await; + let plan = ses.prql_to_lp(&self.query).await?; + let op = OperationInfo::new().with_query_text(self.query.clone()); + let schema = self.schema.insert( + plan.output_schema() + .map(Arc::new) + .unwrap_or_else(|| Arc::new(Schema::empty())), + ); + + let ses_clone = self.conn.session.clone(); Ok(Self::process_result(ExecutionResult::Query { stream: Box::pin(RecordBatchStreamAdapter::new( - Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)), + schema.clone(), futures::stream::once(async move { let mut ses = ses_clone.lock().await; match ses.execute_logical_plan(plan, &op).await { @@ -214,57 +299,36 @@ impl Connection { )), })) } + OperationType::Execute => { + let mut ses = self.conn.session.lock().await; + let plan = ses.create_logical_plan(&self.query).await?; + let op = OperationInfo::new().with_query_text(self.query.clone()); + let _ = self.schema.insert( + plan.output_schema() + .map(Arc::new) + .unwrap_or_else(|| Arc::new(Schema::empty())), + ); + + Ok(Self::process_result( + ses.execute_logical_plan(plan, &op).await?.1, + )) + } } } - pub async fn prql_query( - &self, - query: impl Into, - ) -> Result { - let mut ses = self.session.lock().await; - let query = query.into(); - let plan = ses.prql_to_lp(&query).await?; - let op = OperationInfo::new().with_query_text(query); - - let ses_clone = self.session.clone(); - Ok(Self::process_result(ExecutionResult::Query { - stream: Box::pin(RecordBatchStreamAdapter::new( - Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)), - futures::stream::once(async move { - let mut ses = ses_clone.lock().await; - match ses.execute_logical_plan(plan, &op).await { - Ok((_, res)) => Self::process_result(res), - Err(e) => Self::handle_error(e), - } - }) - .flatten(), - )), - })) - } - - pub fn prql_call(&self, query: impl Into) -> RecordStream { - let ses = self.session.clone(); - let query = query.into(); - + pub fn call(&mut self) -> RecordStream { + let mut op = self.clone(); RecordStream(Box::pin( futures::stream::once(async move { - let mut ses = ses.lock().await; - let plan = match ses.prql_to_lp(&query).await { - Ok(p) => p, - Err(e) => return Self::handle_error(e), - }; - let op = OperationInfo::new().with_query_text(query); - - match ses.execute_logical_plan(plan, &op).await { - Ok((_, stream)) => Self::process_result(stream), + match op.execute().await { Err(err) => Self::handle_error(err), + Ok(stream) => stream, } }) .flatten(), )) } - fn handle_error(err: impl Into) -> SendableRecordBatchStream { Self::process_result(ExecutionResult::Error(err.into())) } @@ -333,20 +397,3 @@ impl Connection { )) } } - -pub struct RecordStream(Pin> + Send>>); - -impl From for RecordStream { - fn from(val: SendableRecordBatchStream) -> RecordStream { - RecordStream(val.boxed()) - } -} - -impl From> for RecordStream { - fn from(val: Result) -> RecordStream { - match val { - Ok(stream) => stream.into(), - Err(err) => RecordStream(Connection::handle_error(err).boxed()), - } - } -} From b47b1a6734e7f429927a77b84c8179cf7774b1b5 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 5 Apr 2024 15:42:50 -0400 Subject: [PATCH 19/34] Revert "chore: backport driver changes from python integration branch" This reverts commit 14c7a61961ff33f00513209b4a13c9ab1f60f1dd. --- crates/glaredb/src/lib.rs | 35 ++++++++--------------------- crates/sqlexec/src/context/local.rs | 8 +++---- crates/sqlexec/src/remote/client.rs | 9 -------- crates/sqlexec/src/session.rs | 2 +- 4 files changed, 14 insertions(+), 40 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 9a7bd10bd..0968f7502 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -10,7 +10,7 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema}; // public re-export so downstream users of this package don't have to // directly depend on DF (and our version no-less) to use our interfaces. pub use datafusion::arrow::record_batch::RecordBatch; -pub use datafusion::error::DataFusionError; +use datafusion::error::DataFusionError; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; pub use datafusion::physical_plan::SendableRecordBatchStream; @@ -18,7 +18,6 @@ use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; -use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; @@ -28,16 +27,16 @@ use url::Url; #[derive(Default, Builder)] pub struct ConnectOptions { - #[builder(setter(into))] + #[builder(setter(into, strip_option))] pub connection_target: Option, - #[builder(setter(into))] + #[builder(setter(into, strip_option))] pub location: Option, - #[builder(setter(into))] + #[builder(setter(into, strip_option))] pub spill_path: Option, - #[builder(setter(into), default = "HashMap::new()")] + #[builder(setter(strip_option))] pub storage_options: HashMap, - #[builder(setter(strip_option))] + #[builder] pub disable_tls: Option, #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] #[builder(setter(into, strip_option))] @@ -45,12 +44,10 @@ pub struct ConnectOptions { #[builder(default = "Some(RemoteClientType::Cli)")] #[builder(setter(strip_option))] pub client_type: Option, - #[builder(setter(strip_option))] - pub environment_reader: Option>>, } impl ConnectOptionsBuilder { - pub fn storage_option( + pub fn set_storage_option( &mut self, key: impl Into, value: impl Into, @@ -62,11 +59,6 @@ impl ConnectOptionsBuilder { opts.insert(key.into(), value.into()); self.storage_options(opts) } - - pub fn set_storage_options(&mut self, opts: Option>) -> &mut Self { - self.storage_options = opts; - self - } } impl ConnectOptions { @@ -78,12 +70,7 @@ impl ConnectOptions { } } - pub fn with_env_reader(&mut self, reader: Arc>) -> &mut Self { - self.environment_reader.replace(reader); - self - } - - pub async fn connect(&mut self) -> Result { + pub async fn connect(&self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; @@ -95,15 +82,11 @@ impl ConnectOptions { self.cloud_url(), self.cloud_addr.clone().unwrap_or_default(), self.disable_tls.unwrap_or_default(), - self.client_type.clone().unwrap_or_default(), + self.client_type.clone().unwrap(), None, ) .await?; - if let Some(env_reader) = self.environment_reader.take() { - session.register_env_reader(env_reader) - } - Ok(Connection { session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index 1814a290e..f6609d052 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -73,7 +73,7 @@ pub struct LocalSessionContext { /// Datafusion session context used for planning and execution. df_ctx: DfSessionContext, /// Read tables from the environment. - env_reader: Option>>, + env_reader: Option>, /// Task scheduler. task_scheduler: Scheduler, /// Notices that should be sent to the user. @@ -217,12 +217,12 @@ impl LocalSessionContext { Ok(()) } - pub fn register_env_reader(&mut self, env_reader: Arc>) { + pub fn register_env_reader(&mut self, env_reader: Box) { self.env_reader = Some(env_reader); } - pub fn get_env_reader(&self) -> Option>> { - self.env_reader.clone() + pub fn get_env_reader(&self) -> Option<&dyn EnvironmentReader> { + self.env_reader.as_deref() } pub fn get_metrics_handler(&self) -> SessionMetricsHandler { diff --git a/crates/sqlexec/src/remote/client.rs b/crates/sqlexec/src/remote/client.rs index 3eeeac452..e42b5ffca 100644 --- a/crates/sqlexec/src/remote/client.rs +++ b/crates/sqlexec/src/remote/client.rs @@ -127,22 +127,13 @@ pub enum RemoteClientType { Cli, Node, Python, - Rust, } - -impl Default for RemoteClientType { - fn default() -> Self { - Self::Rust - } -} - impl fmt::Display for RemoteClientType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { RemoteClientType::Cli => write!(f, "cli"), RemoteClientType::Node => write!(f, "node"), RemoteClientType::Python => write!(f, "python"), - RemoteClientType::Rust => write!(f, "Rust"), } } } diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index 8e794b1fb..e364a9a08 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -466,7 +466,7 @@ impl Session { self.ctx.get_session_catalog() } - pub fn register_env_reader(&mut self, env_reader: Arc>) { + pub fn register_env_reader(&mut self, env_reader: Box) { self.ctx.register_env_reader(env_reader); } From cbeef5813d4f18f347a0eefdf711c29ad7416eed Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 5 Apr 2024 15:46:12 -0400 Subject: [PATCH 20/34] chore: backport from driver branch --- crates/glaredb/src/lib.rs | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 0968f7502..9a7bd10bd 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -10,7 +10,7 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema}; // public re-export so downstream users of this package don't have to // directly depend on DF (and our version no-less) to use our interfaces. pub use datafusion::arrow::record_batch::RecordBatch; -use datafusion::error::DataFusionError; +pub use datafusion::error::DataFusionError; use datafusion::logical_expr::LogicalPlan; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; pub use datafusion::physical_plan::SendableRecordBatchStream; @@ -18,6 +18,7 @@ use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; +use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; @@ -27,16 +28,16 @@ use url::Url; #[derive(Default, Builder)] pub struct ConnectOptions { - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub connection_target: Option, - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub location: Option, - #[builder(setter(into, strip_option))] + #[builder(setter(into))] pub spill_path: Option, - #[builder(setter(strip_option))] + #[builder(setter(into), default = "HashMap::new()")] pub storage_options: HashMap, - #[builder] + #[builder(setter(strip_option))] pub disable_tls: Option, #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] #[builder(setter(into, strip_option))] @@ -44,10 +45,12 @@ pub struct ConnectOptions { #[builder(default = "Some(RemoteClientType::Cli)")] #[builder(setter(strip_option))] pub client_type: Option, + #[builder(setter(strip_option))] + pub environment_reader: Option>>, } impl ConnectOptionsBuilder { - pub fn set_storage_option( + pub fn storage_option( &mut self, key: impl Into, value: impl Into, @@ -59,6 +62,11 @@ impl ConnectOptionsBuilder { opts.insert(key.into(), value.into()); self.storage_options(opts) } + + pub fn set_storage_options(&mut self, opts: Option>) -> &mut Self { + self.storage_options = opts; + self + } } impl ConnectOptions { @@ -70,7 +78,12 @@ impl ConnectOptions { } } - pub async fn connect(&self) -> Result { + pub fn with_env_reader(&mut self, reader: Arc>) -> &mut Self { + self.environment_reader.replace(reader); + self + } + + pub async fn connect(&mut self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; @@ -82,11 +95,15 @@ impl ConnectOptions { self.cloud_url(), self.cloud_addr.clone().unwrap_or_default(), self.disable_tls.unwrap_or_default(), - self.client_type.clone().unwrap(), + self.client_type.clone().unwrap_or_default(), None, ) .await?; + if let Some(env_reader) = self.environment_reader.take() { + session.register_env_reader(env_reader) + } + Ok(Connection { session: Arc::new(Mutex::new(session)), _engine: Arc::new(engine), From b994e35620b7ad89e446eee7c548030cf993f9e7 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 5 Apr 2024 16:00:33 -0400 Subject: [PATCH 21/34] chore: port in sqlexec changes --- crates/sqlexec/src/context/local.rs | 8 ++++---- crates/sqlexec/src/remote/client.rs | 9 +++++++++ crates/sqlexec/src/session.rs | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index f6609d052..1814a290e 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -73,7 +73,7 @@ pub struct LocalSessionContext { /// Datafusion session context used for planning and execution. df_ctx: DfSessionContext, /// Read tables from the environment. - env_reader: Option>, + env_reader: Option>>, /// Task scheduler. task_scheduler: Scheduler, /// Notices that should be sent to the user. @@ -217,12 +217,12 @@ impl LocalSessionContext { Ok(()) } - pub fn register_env_reader(&mut self, env_reader: Box) { + pub fn register_env_reader(&mut self, env_reader: Arc>) { self.env_reader = Some(env_reader); } - pub fn get_env_reader(&self) -> Option<&dyn EnvironmentReader> { - self.env_reader.as_deref() + pub fn get_env_reader(&self) -> Option>> { + self.env_reader.clone() } pub fn get_metrics_handler(&self) -> SessionMetricsHandler { diff --git a/crates/sqlexec/src/remote/client.rs b/crates/sqlexec/src/remote/client.rs index e42b5ffca..30ad2d885 100644 --- a/crates/sqlexec/src/remote/client.rs +++ b/crates/sqlexec/src/remote/client.rs @@ -127,13 +127,22 @@ pub enum RemoteClientType { Cli, Node, Python, + Rust, } + +impl Default for RemoteClientType { + fn default() -> Self { + Self::Rust + } +} + impl fmt::Display for RemoteClientType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { RemoteClientType::Cli => write!(f, "cli"), RemoteClientType::Node => write!(f, "node"), RemoteClientType::Python => write!(f, "python"), + RemoteClientType::Rust => write!(f, "rust"), } } } diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index e364a9a08..8e794b1fb 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -466,7 +466,7 @@ impl Session { self.ctx.get_session_catalog() } - pub fn register_env_reader(&mut self, env_reader: Box) { + pub fn register_env_reader(&mut self, env_reader: Arc>) { self.ctx.register_env_reader(env_reader); } From 7bebf63baf886480e1eb5ae511cf791d8251b419 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 5 Apr 2024 17:22:15 -0400 Subject: [PATCH 22/34] chore: backport crates changes --- crates/glaredb/src/lib.rs | 9 +++------ crates/sqlexec/src/context/local.rs | 4 ++-- crates/sqlexec/src/session.rs | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 9a7bd10bd..8d26f066d 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -79,11 +79,11 @@ impl ConnectOptions { } pub fn with_env_reader(&mut self, reader: Arc>) -> &mut Self { - self.environment_reader.replace(reader); + self.environment_reader = Some(reader); self } - pub async fn connect(&mut self) -> Result { + pub async fn connect(&self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; engine = engine.with_spill_path(self.spill_path.clone().map(|p| p.into()))?; @@ -99,10 +99,7 @@ impl ConnectOptions { None, ) .await?; - - if let Some(env_reader) = self.environment_reader.take() { - session.register_env_reader(env_reader) - } + session.register_env_reader(self.environment_reader.clone()); Ok(Connection { session: Arc::new(Mutex::new(session)), diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index 1814a290e..7973835e5 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -217,8 +217,8 @@ impl LocalSessionContext { Ok(()) } - pub fn register_env_reader(&mut self, env_reader: Arc>) { - self.env_reader = Some(env_reader); + pub fn register_env_reader(&mut self, reader: Option>>) { + self.env_reader = reader; } pub fn get_env_reader(&self) -> Option>> { diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index 8e794b1fb..7f710eae6 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -466,7 +466,7 @@ impl Session { self.ctx.get_session_catalog() } - pub fn register_env_reader(&mut self, env_reader: Arc>) { + pub fn register_env_reader(&mut self, env_reader: Option>>) { self.ctx.register_env_reader(env_reader); } From 5d462185b5c3333cd005a8866f41f2bd6ed517c7 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 7 Apr 2024 08:33:42 -0400 Subject: [PATCH 23/34] fix: comple --- bindings/python/src/connect.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/src/connect.rs b/bindings/python/src/connect.rs index 5043c3bfd..23bceccf9 100644 --- a/bindings/python/src/connect.rs +++ b/bindings/python/src/connect.rs @@ -124,7 +124,7 @@ pub fn connect( .await .map_err(PyGlareDbError::from)?; - session.register_env_reader(Box::new(PyEnvironmentReader)); + session.register_env_reader(Some(Arc::new(Box::new(PyEnvironmentReader)))); Ok(Connection { session: Arc::new(Mutex::new(session)), From bd9f621ff138f918005a80ba31828dd2d31d3313 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 7 Apr 2024 15:36:47 -0400 Subject: [PATCH 24/34] THIS IS AN EMPTY COMMIT From b9b52b6f8bd4025c096a85831a96f408af64c306 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Sun, 7 Apr 2024 17:08:30 -0400 Subject: [PATCH 25/34] fix: non-deterministic http test --- testdata/sqllogictests/http.slt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testdata/sqllogictests/http.slt b/testdata/sqllogictests/http.slt index 392960521..899aa8f5b 100644 --- a/testdata/sqllogictests/http.slt +++ b/testdata/sqllogictests/http.slt @@ -5,7 +5,10 @@ statement error Error during planning: missing file extension: http://host.com/ select * from 'http://host.com/path/*' # Querying a source without "Content-Length" information. +# TODO: this test is flaky as the source changes. Verify with: +# curl 'https://opdb.org/api/search/typeahead?q=*' | jq length +# query I SELECT count(*) FROM read_json('https://opdb.org/api/search/typeahead?q=*'); ---- -100 +99 From a65a831197372ec475f89ad6f0dbb6adcd150ece Mon Sep 17 00:00:00 2001 From: tycho garen Date: Mon, 8 Apr 2024 18:37:04 -0400 Subject: [PATCH 26/34] feat: documentation for Rust SDK crate --- crates/glaredb/src/lib.rs | 124 +++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 7 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 8d26f066d..a426cf372 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -1,3 +1,14 @@ +//! GlareDB is a database engine designed to provide the user +//! experience and ergonomic embedded databases with the compute power +//! and flexibility of large scale distributed serverless compute engines. +//! +//! The GlareDB Rust SDK is a set of high-level wrappers for a GlareDB +//! instance as either a client or an embedded database. The +//! implementation primarily underpins the implementations of the +//! Python and Node.JS bindings, but may be used/useful directly for +//! testing GlareDB from within Rust tests, and even inside of rust +//! applications or to produce other bindings. + use std::collections::HashMap; use std::fmt::Debug; use std::path::PathBuf; @@ -18,38 +29,81 @@ use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; -use sqlexec::environment::EnvironmentReader; +pub use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; use sqlexec::OperationInfo; use url::Url; - +/// ConnectionOptions are the set of options to configure a GlareDB +/// instance, and are an analogue to the commandline arguments to +/// produce a "running database". The ConnectOptionsBuilder provides a +/// chainable interface to construct these values and produce as +/// structure. You can construct `ConnectOptions` fully without the +/// builder interface, depending on your preference. +/// +/// The `.connect()` method on `ConnectionOptions` is the main way to +/// produce a GlareDB instance. All instances use an in-process +/// metastore (catalog). +/// +/// The `connection_target`, `location`, `spill_path` and +/// `storage_options` all control the behavior of a local, single node +/// instance, while the remaining options configure a GlareDB instance +/// for hybrid-execution. #[derive(Default, Builder)] pub struct ConnectOptions { + /// The connection_target specifies where the GlareDB instances + /// storage is. This is either, in memory (unspecified or + /// `memory://`), a path to something on the local file-system for + /// persistent local databases, or an object-store URL for + /// databases that are backed up onto cloud storage. #[builder(setter(into))] pub connection_target: Option, + /// Location is that path **within** the `connection_target` where + /// the database's files will be stored. This is required for all + /// object-store backed GlareDB instances and ignored in all other + /// cases. #[builder(setter(into))] pub location: Option, + /// Specifies the location on the local file system where this + /// process will write files so that it can spill data for + /// operations to local disk (sorts, large joins, etc.) #[builder(setter(into))] pub spill_path: Option, + /// Defines the options used to configure the object store + /// (credentials, etc.) #[builder(setter(into), default = "HashMap::new()")] pub storage_options: HashMap, + /// By default, the client will connect to the GlareDB service + /// using TLS. When this option is specified (and true), the this + /// GlareDB instance will establish an insecure connection. Use for + /// testing and development. #[builder(setter(strip_option))] pub disable_tls: Option, + /// Location of the GlareDB clout instance used by GlareDB + /// negotiate out-of-band certificate provisioning. #[builder(default = "Some(\"https://console.glaredb.com\".to_string())")] #[builder(setter(into, strip_option))] pub cloud_addr: Option, + /// Client type distinguishes what kind of remote client this is, + /// and is used for logging and introspection. #[builder(default = "Some(RemoteClientType::Cli)")] #[builder(setter(strip_option))] pub client_type: Option, + /// Specify an optional environment reader, which GlareDB uses in + /// embedded cases so that queries can bindings to extract tables + /// from variables in the binding's scope that data frames, or the + /// output of a query. #[builder(setter(strip_option))] pub environment_reader: Option>>, } impl ConnectOptionsBuilder { + /// Adds a single option (key/value pair) to the builder for the + /// storage options map. All keys must be unique, and setting the + /// same otpion more than once. pub fn storage_option( &mut self, key: impl Into, @@ -63,6 +117,9 @@ impl ConnectOptionsBuilder { self.storage_options(opts) } + /// Overrides the storage option map in the Builder. Passing an + /// empty map or None to this method removes the existing data and + /// resets the state of the storage options in the builder. pub fn set_storage_options(&mut self, opts: Option>) -> &mut Self { self.storage_options = opts; self @@ -70,6 +127,11 @@ impl ConnectOptionsBuilder { } impl ConnectOptions { + /// Constructs an in-memory connection, which can be used for + /// default operations and tests without impacting the file + /// system. All state (tables, catalog, etc,) are local, but these + /// instances can write data to files and process data in other + /// data sources. pub fn new_in_memory() -> Self { Self { location: None, @@ -78,11 +140,8 @@ impl ConnectOptions { } } - pub fn with_env_reader(&mut self, reader: Arc>) -> &mut Self { - self.environment_reader = Some(reader); - self - } - + /// Creates a Connection object according to the options + /// specified. pub async fn connect(&self) -> Result { let mut engine = Engine::from_backend(self.backend()).await?; @@ -137,6 +196,18 @@ impl ConnectOptions { } } +/// Connections hold the state of a GlareDB object. These connections +/// are not always backed by network connections, and in all cases +/// include the full capabilities of a local GlareDB instance. When +/// connected to a remote GlareDB instance, all execution is hybrid +/// wherein queries are parsed, planed and orchestrated locally, but +/// executation can occur locally or on the remote instance according +/// to capacity. +/// +/// All of the connection's operations are lazy, and return +/// `Operation` objects that must be executed in order for the query +/// to run. `Operation` objects can be executed more than once to +/// rerun the query. #[derive(Clone)] pub struct Connection { session: Arc>, @@ -144,6 +215,9 @@ pub struct Connection { } impl Connection { + /// Execute creates a query that is parsed and then evaluates and + /// runs immediately when the Operation is invoked regardless of + /// the content. pub fn execute(&self, query: impl Into) -> Operation { Operation { op: OperationType::Execute, @@ -153,6 +227,10 @@ impl Connection { } } + /// Creates a query that is parsed when the Operation is invoked; + /// however, the query is only executed when the results are + /// iterated _unless_ the operation is a write operation or a DDL + /// operation, which are executed when the operation is invoked. pub fn sql(&self, query: impl Into) -> Operation { Operation { op: OperationType::Sql, @@ -162,6 +240,10 @@ impl Connection { } } + /// PRQL queries have the same semantics as SQL queries; however, + /// because PRQL does not include syntax for DML or DDL + /// operations, these queries only run when the result stream are + /// invoked. pub fn prql(&self, query: impl Into) -> Operation { Operation { op: OperationType::Prql, @@ -172,6 +254,9 @@ impl Connection { } } +/// RecordStream is like DataFusion's `SendableRecordBatchStream`, +/// except it does not provide access to the schema except via the +/// results. pub struct RecordStream(Pin> + Send>>); impl Stream for RecordStream { @@ -206,10 +291,19 @@ impl From> for RecordStream { } } + #[derive(Debug, Clone)] enum OperationType { + /// SQL operations create a lazy operation that runs DDL/DML + /// operations directly, and executes other queries when the + /// results are iterated. Sql, + /// PRQL, which does not support DDL/DML in our implementation, + /// creates a lazy query object that only runs when the results + /// are iterated. Prql, + /// Execute Operations run a SQL operation directly when the + /// `Operation`'s `execute()` method runs. Execute, } @@ -229,10 +323,20 @@ impl Debug for Operation { } impl Operation { + /// Schema returns the schema of the results. This method returns + /// `None` before the query executes. pub fn schema(&self) -> Option> { self.schema.clone() } + /// Executes the query, according the semantics of the operation's + /// type. Returns an error if there was a problem parsing the + /// query or creating a stream. Operations created with + /// `execute()` run when this `execute()` method runs. For + /// operations with the `sql()` method, write operations and DDL + /// operations run before `execute()` returns. All other + /// operations are lazy and only execute when the results are + /// processed. pub async fn execute(&mut self) -> Result { match self.op { OperationType::Sql => { @@ -313,7 +417,13 @@ impl Operation { } } + /// Call returns the results of the query as a stream. No + /// processing happens until the stream is processed, and errors + /// parsing the query are returned as the the first result. pub fn call(&mut self) -> RecordStream { + // note the synchronous iterator in + // https://github.com/GlareDB/glaredb/pull/2848, provides a + // "native" way to write fully synchronous tests let mut op = self.clone(); RecordStream(Box::pin( futures::stream::once(async move { From d0e8a35e198b91c9a562383cb0573ab2dfe904d9 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Tue, 9 Apr 2024 10:46:47 -0400 Subject: [PATCH 27/34] chore: hoist nodejs onto rust sdk --- Cargo.lock | 10 +- bindings/nodejs/Cargo.toml | 12 +- bindings/nodejs/src/connect.rs | 60 ++++---- bindings/nodejs/src/connection.rs | 190 ++++-------------------- bindings/nodejs/src/error.rs | 2 + bindings/nodejs/src/execution.rs | 138 +++++++++++++++++ bindings/nodejs/src/execution_result.rs | 77 ---------- bindings/nodejs/src/lib.rs | 3 +- bindings/nodejs/src/logical_plan.rs | 75 ---------- crates/glaredb/src/lib.rs | 15 ++ 10 files changed, 229 insertions(+), 353 deletions(-) create mode 100644 bindings/nodejs/src/execution.rs delete mode 100644 bindings/nodejs/src/execution_result.rs delete mode 100644 bindings/nodejs/src/logical_plan.rs diff --git a/Cargo.lock b/Cargo.lock index c6447e306..35fce8850 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -539,6 +539,12 @@ dependencies = [ "event-listener 2.5.3", ] +[[package]] +name = "async-once-cell" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb" + [[package]] name = "async-openai" version = "0.19.1" @@ -2656,7 +2662,7 @@ dependencies = [ "errno", "fix-hidden-lifetime-bug", "futures", - "hashbrown 0.12.3", + "hashbrown 0.14.2", "indexmap 2.2.6", "itertools 0.12.0", "lazy_static", @@ -4028,11 +4034,13 @@ version = "0.0.0" dependencies = [ "anyhow", "arrow_util", + "async-once-cell", "async-trait", "bytes", "datafusion", "datafusion_ext", "futures", + "glaredb", "ioutil", "lzma-sys", "metastore", diff --git a/bindings/nodejs/Cargo.toml b/bindings/nodejs/Cargo.toml index a0446eef3..719ab2df3 100644 --- a/bindings/nodejs/Cargo.toml +++ b/bindings/nodejs/Cargo.toml @@ -10,14 +10,15 @@ crate-type = ["cdylib"] workspace = true [dependencies] +arrow_util = { path = "../../crates/arrow_util" } +datafusion_ext = { path = "../../crates/datafusion_ext" } +glaredb = { path = "../../crates/glaredb" } ioutil = { path = "../../crates/ioutil" } -sqlexec = { path = "../../crates/sqlexec" } metastore = { path = "../../crates/metastore" } -telemetry = { path = "../../crates/telemetry" } -pgsrv = { path = "../../crates/pgsrv" } pgrepr = { path = "../../crates/pgrepr" } -datafusion_ext = { path = "../../crates/datafusion_ext" } -arrow_util = { path = "../../crates/arrow_util" } +pgsrv = { path = "../../crates/pgsrv" } +sqlexec = { path = "../../crates/sqlexec" } +telemetry = { path = "../../crates/telemetry" } terminal_util = { path = "../../crates/terminal_util" } futures = { workspace = true } datafusion = { workspace = true } @@ -32,6 +33,7 @@ napi = { version = "2.16.1", default-features = false, features = ["full"] } napi-derive = "2.16.1" once_cell = "1.19.0" bytes = "1.6.0" +async-once-cell = "0.5.3" [build-dependencies] napi-build = "2.1.2" diff --git a/bindings/nodejs/src/connect.rs b/bindings/nodejs/src/connect.rs index d8c158915..75332326a 100644 --- a/bindings/nodejs/src/connect.rs +++ b/bindings/nodejs/src/connect.rs @@ -4,8 +4,12 @@ //! queries. use std::collections::HashMap; +use std::sync::Arc; + +use sqlexec::remote::client::RemoteClientType; use crate::connection::Connection; +use crate::error::JsGlareDbError; #[napi(object)] #[derive(Default)] @@ -17,44 +21,38 @@ pub struct ConnectOptions { pub storage_options: Option>, } -impl ConnectOptions { - fn spill_path(&mut self) -> Option { - self.spill_path.take() - } - - fn disable_tls(&self) -> bool { - self.disable_tls.unwrap_or(false) - } - - fn cloud_addr(&self) -> String { - self.cloud_addr - .clone() - .unwrap_or(String::from("https://console.glaredb.com")) - } - - fn location(&mut self) -> Option { - self.location.take() - } - - fn storage_options(&mut self) -> Option> { - self.storage_options.take() +impl TryFrom for glaredb::ConnectOptions { + type Error = glaredb::ConnectOptionsBuilderError; + + fn try_from( + val: ConnectOptions, + ) -> Result { + glaredb::ConnectOptionsBuilder::default() + .spill_path(val.spill_path) + .disable_tls_opt(val.disable_tls) + .cloud_addr_opt(val.cloud_addr) + .location(val.location) + .storage_options_opt(val.storage_options) + .client_type(RemoteClientType::Node) + .build() } } + /// Connect to a GlareDB database. #[napi(catch_unwind)] pub async fn connect( data_dir_or_cloud_url: Option, options: Option, ) -> napi::Result { - let mut options = options.unwrap_or_default(); - Connection::connect( - data_dir_or_cloud_url, - options.spill_path(), - options.disable_tls(), - options.cloud_addr(), - options.location(), - options.storage_options(), - ) - .await + let mut options: glaredb::ConnectOptions = options + .unwrap_or_default() + .try_into() + .map_err(JsGlareDbError::from)?; + + options.connection_target = data_dir_or_cloud_url; + + Ok(Connection { + inner: Arc::new(options.connect().await.map_err(JsGlareDbError::from)?), + }) } diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index 7867eda52..17a468e5e 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -1,17 +1,12 @@ -use std::collections::HashMap; -use std::path::PathBuf; use std::sync::Arc; -use datafusion::logical_expr::LogicalPlan as DFLogicalPlan; -use datafusion_ext::vars::SessionVars; +use async_once_cell::OnceCell; use futures::lock::Mutex; -use sqlexec::engine::{Engine, EngineBackend, SessionStorageConfig, TrackedSession}; -use sqlexec::remote::client::RemoteClientType; -use sqlexec::{LogicalPlan, OperationInfo}; -use url::Url; +use sqlexec::engine::TrackedSession; +use sqlexec::errors::ExecError; use crate::error::JsGlareDbError; -use crate::logical_plan::JsLogicalPlan; +use crate::execution::JsExecution; pub(super) type JsTrackedSession = Arc>; @@ -19,112 +14,28 @@ pub(super) type JsTrackedSession = Arc>; #[napi] #[derive(Clone)] pub struct Connection { - pub(crate) session: JsTrackedSession, - pub(crate) _engine: Arc, -} - -#[derive(Debug, Clone)] -struct JsSessionConf { - /// Where to store both metastore and user data. - data_dir: Option, - /// URL for cloud deployment to connect to. - cloud_url: Option, -} - -impl From> for JsSessionConf { - fn from(value: Option) -> Self { - match value { - Some(s) => match Url::parse(&s) { - Ok(u) => JsSessionConf { - data_dir: None, - cloud_url: Some(u), - }, - // Assume failing to parse a url just means the user provided a local path. - Err(_) => JsSessionConf { - data_dir: Some(PathBuf::from(s)), - cloud_url: None, - }, - }, - None => JsSessionConf { - data_dir: None, - cloud_url: None, - }, - } - } + pub(crate) inner: Arc, } #[napi] impl Connection { - pub(crate) async fn connect( - data_dir_or_cloud_url: Option, - spill_path: Option, - disable_tls: bool, - cloud_addr: String, - location: Option, - storage_options: Option>, - ) -> napi::Result { - let conf = JsSessionConf::from(data_dir_or_cloud_url); - - let backend = if let Some(location) = location.clone() { - EngineBackend::Remote { - location, - options: storage_options.unwrap_or_default(), - } - } else if let Some(data_dir) = conf.data_dir.clone() { - EngineBackend::Local(data_dir) - } else { - EngineBackend::Memory - }; - - let mut engine = Engine::from_backend(backend) - .await - .map_err(JsGlareDbError::from)?; - - engine = engine - .with_spill_path(spill_path.map(|p| p.into())) - .map_err(JsGlareDbError::from)?; - - let mut session = engine - .default_local_session_context() - .await - .map_err(JsGlareDbError::from)?; - - session - .create_client_session( - conf.cloud_url.clone(), - cloud_addr, - disable_tls, - RemoteClientType::Node, - None, - ) - .await - .map_err(JsGlareDbError::from)?; - - Ok(Connection { - session: Arc::new(Mutex::new(session)), - _engine: Arc::new(engine), - }) - } - /// Returns a default connection to an in-memory database. /// /// The database is only initialized once, and all subsequent calls will /// return the same connection. #[napi(catch_unwind)] pub async fn default_in_memory() -> napi::Result { - let engine = Engine::from_backend(EngineBackend::Memory) + static DEFAULT_CON: OnceCell = OnceCell::new(); + + Ok(DEFAULT_CON + .get_or_try_init(async { + Ok::<_, ExecError>(Connection { + inner: Arc::new(glaredb::ConnectOptions::new_in_memory().connect().await?), + }) + }) .await - .map_err(JsGlareDbError::from)?; - let sess = engine - .new_local_session_context(SessionVars::default(), SessionStorageConfig::default()) - .await - .map_err(JsGlareDbError::from)?; - let con = Connection { - session: Arc::new(Mutex::new(sess)), - _engine: Arc::new(engine), - }; - - Ok(con.clone()) + .map_err(JsGlareDbError::from)? + .clone()) } /// Run a SQL operation against a GlareDB database. @@ -165,38 +76,13 @@ impl Connection { /// await con.sql('create table my_table (a int)').then(cursor => cursor.execute()) /// ``` #[napi(catch_unwind)] - pub async fn sql(&self, query: String) -> napi::Result { - let cloned_sess = self.session.clone(); - let mut sess = self.session.lock().await; - - let plan = sess - .create_logical_plan(&query) + pub async fn sql(&self, query: String) -> napi::Result { + Ok(self + .inner + .sql(query) + .execute() .await - .map_err(JsGlareDbError::from)?; - - let op = OperationInfo::new().with_query_text(query); - - match plan - .to_owned() - .try_into_datafusion_plan() - .expect("resolving logical plan") - { - DFLogicalPlan::Extension(_) - | DFLogicalPlan::Dml(_) - | DFLogicalPlan::Ddl(_) - | DFLogicalPlan::Copy(_) => { - sess.execute_logical_plan(plan, &op) - .await - .map_err(JsGlareDbError::from)?; - - Ok(JsLogicalPlan::new( - LogicalPlan::Noop, - cloned_sess, - Default::default(), - )) - } - _ => Ok(JsLogicalPlan::new(plan, cloned_sess, op)), - } + .map_err(JsGlareDbError::from)?) } /// Run a PRQL query against a GlareDB database. Does not change @@ -213,17 +99,8 @@ impl Connection { /// All operations execute lazily when their results are /// processed. #[napi(catch_unwind)] - pub async fn prql(&self, query: String) -> napi::Result { - let cloned_sess = self.session.clone(); - let mut sess = self.session.lock().await; - let plan = sess - .prql_to_lp(&query) - .await - .map_err(JsGlareDbError::from)?; - - let op = OperationInfo::new().with_query_text(query); - - Ok(JsLogicalPlan::new(plan, cloned_sess, op)) + pub async fn prql(&self, query: String) -> napi::Result { + Ok(self.inner.prql(query).await.map_err(JsGlareDbError::from)?) } /// Execute a query. @@ -240,22 +117,11 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn execute(&self, query: String) -> napi::Result<()> { - let sess = self.session.clone(); - let mut sess = sess.lock().await; - - let plan = sess - .create_logical_plan(&query) - .await - .map_err(JsGlareDbError::from)?; - - let op = OperationInfo::new().with_query_text(query); - - let _ = sess - .execute_logical_plan(plan, &op) - .await - .map_err(JsGlareDbError::from)?; - - Ok(()) + Ok(self + .inner + .execute(query) + .execute() + .map_err(JsGlareDbError::from)?) } /// Close the current session. diff --git a/bindings/nodejs/src/error.rs b/bindings/nodejs/src/error.rs index 1b97a1759..8ee2f904e 100644 --- a/bindings/nodejs/src/error.rs +++ b/bindings/nodejs/src/error.rs @@ -18,6 +18,8 @@ pub enum JsGlareDbError { Anyhow(#[from] anyhow::Error), #[error(transparent)] DataFusion(#[from] datafusion::error::DataFusionError), + #[error(transparent)] + ConnectionConfiguration(#[from] glaredb::ConnectOptionsBuilderError), #[error("{0}")] Other(String), } diff --git a/bindings/nodejs/src/execution.rs b/bindings/nodejs/src/execution.rs new file mode 100644 index 000000000..4cede1650 --- /dev/null +++ b/bindings/nodejs/src/execution.rs @@ -0,0 +1,138 @@ +use std::sync::{Arc, Mutex}; + +use arrow_util::pretty; +use futures::stream::StreamExt; +use glaredb::RecordBatch; +use sqlexec::session::ExecutionResult; + +use crate::error::JsGlareDbError; + +#[napi] +#[derive(Clone, Debug)] +pub struct JsExecution { + op: Arc>, +} + +impl From for JsExecution { + fn from(opt: glaredb::Operation) -> Self { + Self { + op: Arc::new(Mutex::new(opt)), + } + } +} + +impl JsExecution { + pub(crate) async fn legacy_execute(&mut self) -> napi::Result<()> { + match &mut self.0 { + ExecutionResult::Query { stream, .. } => { + while let Some(r) = stream.next().await { + let _ = r.map_err(JsGlareDbError::from)?; + } + Ok(()) + } + _ => Ok(()), + } + } + + pub(crate) async fn legacy_to_arrow_inner(&mut self) -> napi::Result> { + let mut stream = self.op.lock().unwrap().execute().await?; + + let mut data_batch = vec![]; + let cursor = std::io::Cursor::new(&mut data_batch); + let mut writer = + FileWriter::try_new(cursor, stream.schema().as_ref()).map_err(JsGlareDbError::from)?; + + while let Some(batch) = stream.next().await { + let batch = batch.map_err(JsGlareDbError::from)?; + writer.write(&batch).map_err(JsGlareDbError::from)?; + } + + writer.finish().map_err(JsGlareDbError::from)?; + drop(writer); + + Ok(data_batch) + } + + pub(crate) async fn legacy_show(&mut self) -> napi::Result<()> { + print_batch(&mut self.0).await?; + Ok(()) + } +} + +#[napi] +impl JsExecution { + #[napi(catch_unwind)] + pub fn to_string(&self) -> napi::Result { + Ok(format!("{:?}", self.op.lock().unwrap())) + } + + #[napi(catch_unwind)] + pub async fn show(&self) -> napi::Result<()> { + let _res = self + .op + .lock() + .unwrap() + .execute() + .await + .map_err(JsGlareDbError::from)?; + Ok(()) + } + + #[napi(catch_unwind)] + pub async fn execute(&self) -> napi::Result<()> { + self.execute_inner().await?.execute().await?; + Ok(()) + } + + #[napi(catch_unwind)] + pub async fn to_ipc(&self) -> napi::Result { + let inner = self.execute_inner().await?.to_arrow_inner().await?; + Ok(inner.into()) + } + + #[napi(ts_return_type = "pl.DataFrame")] + /// Convert to a Polars DataFrame. + /// "nodejs-polars" must be installed as a peer dependency. + /// See https://www.npmjs.com/package/nodejs-polars + pub async fn to_polars(&self) -> napi::Result<()> { + // TODO: implement this in rust if possible? + // Currently, this is monkeypatched in glaredb.js + unimplemented!("to_polars") + } + + #[napi(ts_return_type = "arrow.Table")] + /// Convert to an "apache-arrow" Table. + /// "apache-arrow" must be installed as a peer dependency. + /// See https://www.npmjs.com/package/apache-arrow + pub async fn to_arrow(&self) -> napi::Result<()> { + // TODO: implement this in rust if possible? + // Currently, this is monkeypatched in glaredb.js + unimplemented!("to_arrow") + } +} + +async fn print_batch(result: &mut ExecutionResult) -> napi::Result<()> { + match result { + ExecutionResult::Query { stream, .. } => { + let schema = stream.schema(); + let batches = stream + .collect::>() + .await + .into_iter() + .collect::, _>>() + .map_err(JsGlareDbError::from)?; + + let disp = pretty::pretty_format_batches( + &schema, + &batches, + Some(terminal_util::term_width()), + None, + ) + .map_err(|e| napi::Error::from_reason(e.to_string()))?; + + println!("{}", disp); + Ok(()) + } + _ => Err(napi::Error::from_reason("Not able to show executed result")), + } +} diff --git a/bindings/nodejs/src/execution_result.rs b/bindings/nodejs/src/execution_result.rs deleted file mode 100644 index 57bef61ec..000000000 --- a/bindings/nodejs/src/execution_result.rs +++ /dev/null @@ -1,77 +0,0 @@ -use arrow_util::pretty; -use datafusion::arrow::ipc::writer::FileWriter; -use datafusion::arrow::record_batch::RecordBatch; -use futures::StreamExt; -use sqlexec::session::ExecutionResult; - -use crate::error::JsGlareDbError; - -pub(crate) struct JsExecutionResult(pub(crate) ExecutionResult); - -impl JsExecutionResult { - pub(crate) async fn execute(&mut self) -> napi::Result<()> { - match &mut self.0 { - ExecutionResult::Query { stream, .. } => { - while let Some(r) = stream.next().await { - let _ = r.map_err(JsGlareDbError::from)?; - } - Ok(()) - } - _ => Ok(()), - } - } - - pub(crate) async fn to_arrow_inner(&mut self) -> napi::Result> { - let res = match &mut self.0 { - ExecutionResult::Query { stream, .. } => { - let mut data_batch = vec![]; - let cursor = std::io::Cursor::new(&mut data_batch); - let mut writer = FileWriter::try_new(cursor, stream.schema().as_ref()) - .map_err(JsGlareDbError::from)?; - - while let Some(batch) = stream.next().await { - let batch = batch.map_err(JsGlareDbError::from)?; - writer.write(&batch).map_err(JsGlareDbError::from)?; - } - - writer.finish().map_err(JsGlareDbError::from)?; - drop(writer); - - data_batch - } - _ => vec![], - }; - Ok(res) - } - - pub(crate) async fn show(&mut self) -> napi::Result<()> { - print_batch(&mut self.0).await?; - Ok(()) - } -} - -async fn print_batch(result: &mut ExecutionResult) -> napi::Result<()> { - match result { - ExecutionResult::Query { stream, .. } => { - let schema = stream.schema(); - let batches = stream - .collect::>() - .await - .into_iter() - .collect::, _>>() - .map_err(JsGlareDbError::from)?; - - let disp = pretty::pretty_format_batches( - &schema, - &batches, - Some(terminal_util::term_width()), - None, - ) - .map_err(|e| napi::Error::from_reason(e.to_string()))?; - - println!("{}", disp); - Ok(()) - } - _ => Err(napi::Error::from_reason("Not able to show executed result")), - } -} diff --git a/bindings/nodejs/src/lib.rs b/bindings/nodejs/src/lib.rs index 95fefeaec..52b1e49dc 100644 --- a/bindings/nodejs/src/lib.rs +++ b/bindings/nodejs/src/lib.rs @@ -2,7 +2,6 @@ pub mod connect; pub mod connection; pub mod error; -pub mod execution_result; -pub mod logical_plan; +pub mod execution; #[macro_use] extern crate napi_derive; diff --git a/bindings/nodejs/src/logical_plan.rs b/bindings/nodejs/src/logical_plan.rs deleted file mode 100644 index 419f1fb60..000000000 --- a/bindings/nodejs/src/logical_plan.rs +++ /dev/null @@ -1,75 +0,0 @@ -use sqlexec::{LogicalPlan, OperationInfo}; - -use crate::connection::JsTrackedSession; -use crate::error::JsGlareDbError; -use crate::execution_result::JsExecutionResult; - -#[napi] -#[derive(Clone, Debug)] -pub struct JsLogicalPlan { - pub(crate) lp: LogicalPlan, - pub(crate) session: JsTrackedSession, - pub(crate) op: OperationInfo, -} - -impl JsLogicalPlan { - pub(super) fn new(lp: LogicalPlan, session: JsTrackedSession, op: OperationInfo) -> Self { - Self { lp, session, op } - } - - async fn execute_inner(&self) -> napi::Result { - let mut sess = self.session.lock().await; - let (_, stream) = sess - .execute_logical_plan(self.lp.clone(), &self.op) - .await - .map_err(JsGlareDbError::from)?; - - Ok(JsExecutionResult(stream)) - } -} - -#[napi] -impl JsLogicalPlan { - #[napi(catch_unwind)] - pub fn to_string(&self) -> napi::Result { - Ok(format!("{:?}", self.lp)) - } - - #[napi(catch_unwind)] - pub async fn show(&self) -> napi::Result<()> { - self.execute_inner().await?.show().await?; - Ok(()) - } - - #[napi(catch_unwind)] - pub async fn execute(&self) -> napi::Result<()> { - self.execute_inner().await?.execute().await?; - Ok(()) - } - - #[napi(catch_unwind)] - pub async fn to_ipc(&self) -> napi::Result { - let inner = self.execute_inner().await?.to_arrow_inner().await?; - Ok(inner.into()) - } - - #[napi(ts_return_type = "pl.DataFrame")] - /// Convert to a Polars DataFrame. - /// "nodejs-polars" must be installed as a peer dependency. - /// See https://www.npmjs.com/package/nodejs-polars - pub async fn to_polars(&self) -> napi::Result<()> { - // TODO: implement this in rust if possible? - // Currently, this is monkeypatched in glaredb.js - unimplemented!("to_polars") - } - - #[napi(ts_return_type = "arrow.Table")] - /// Convert to an "apache-arrow" Table. - /// "apache-arrow" must be installed as a peer dependency. - /// See https://www.npmjs.com/package/apache-arrow - pub async fn to_arrow(&self) -> napi::Result<()> { - // TODO: implement this in rust if possible? - // Currently, this is monkeypatched in glaredb.js - unimplemented!("to_arrow") - } -} diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index a426cf372..24b1aa348 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -124,6 +124,21 @@ impl ConnectOptionsBuilder { self.storage_options = opts; self } + + pub fn cloud_addr_opt(&mut self, v: Option) -> &mut Self { + self.cloud_addr = Some(v); + self + } + + pub fn disable_tls_opt(&mut self, v: Option) -> &mut Self { + self.disable_tls = Some(v); + self + } + + pub fn storage_options_opt(&mut self, v: Option>) -> &mut Self { + self.storage_options = v; + self + } } impl ConnectOptions { From 7a1d83b2323aaa5f0efbeb642a99e105e6acd33a Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 11 Apr 2024 17:43:25 -0400 Subject: [PATCH 28/34] chore: clean up integration --- bindings/nodejs/src/connection.rs | 37 +++++----- bindings/nodejs/src/execution.rs | 112 ++++++++++++------------------ bindings/nodejs/src/lib.rs | 0 crates/glaredb/src/lib.rs | 2 +- 4 files changed, 64 insertions(+), 87 deletions(-) mode change 100644 => 100755 bindings/nodejs/src/lib.rs diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index fb4097c0d..d4541b4c7 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -1,15 +1,10 @@ use std::sync::Arc; use async_once_cell::OnceCell; -use futures::lock::Mutex; -use sqlexec::engine::TrackedSession; -use sqlexec::errors::ExecError; use crate::error::JsGlareDbError; use crate::execution::JsExecution; -pub(super) type JsTrackedSession = Arc>; - /// A connected session to a GlareDB database. #[napi] #[derive(Clone)] @@ -30,12 +25,16 @@ impl Connection { Ok(DEFAULT_CON .get_or_try_init(async { - Ok::<_, ExecError>(Connection { - inner: Arc::new(glaredb::ConnectOptions::new_in_memory().connect().await?), + Ok::<_, JsGlareDbError>(Connection { + inner: Arc::new( + glaredb::ConnectOptionsBuilder::new_in_memory() + .build()? + .connect() + .await?, + ), }) }) - .await - .map_err(JsGlareDbError::from)? + .await? .clone()) } @@ -78,12 +77,9 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn sql(&self, query: String) -> napi::Result { - Ok(self - .inner - .sql(query) - .execute() - .await - .map_err(JsGlareDbError::from)?) + let mut op = self.inner.sql(query); + op.execute().await.map_err(JsGlareDbError::from)?; + Ok(op.into()) } /// Run a PRQL query against a GlareDB database. Does not change @@ -101,7 +97,9 @@ impl Connection { /// processed. #[napi(catch_unwind)] pub async fn prql(&self, query: String) -> napi::Result { - Ok(self.inner.prql(query).await.map_err(JsGlareDbError::from)?) + let mut op = self.inner.prql(query); + op.execute().await.map_err(JsGlareDbError::from)?; + Ok(op.into()) } /// Execute a query. @@ -118,11 +116,12 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn execute(&self, query: String) -> napi::Result<()> { - Ok(self - .inner + self.inner .execute(query) .execute() - .map_err(JsGlareDbError::from)?) + .await + .map_err(JsGlareDbError::from)?; + Ok(()) } /// Close the current session. diff --git a/bindings/nodejs/src/execution.rs b/bindings/nodejs/src/execution.rs index 4cede1650..18303663c 100644 --- a/bindings/nodejs/src/execution.rs +++ b/bindings/nodejs/src/execution.rs @@ -1,9 +1,9 @@ use std::sync::{Arc, Mutex}; use arrow_util::pretty; +use datafusion::arrow::ipc::writer::FileWriter; use futures::stream::StreamExt; -use glaredb::RecordBatch; -use sqlexec::session::ExecutionResult; +use glaredb::{RecordBatch, SendableRecordBatchStream}; use crate::error::JsGlareDbError; @@ -22,40 +22,24 @@ impl From for JsExecution { } impl JsExecution { - pub(crate) async fn legacy_execute(&mut self) -> napi::Result<()> { - match &mut self.0 { - ExecutionResult::Query { stream, .. } => { - while let Some(r) = stream.next().await { - let _ = r.map_err(JsGlareDbError::from)?; - } - Ok(()) + pub(crate) async fn to_arrow_inner(&self) -> napi::Result> { + let mut op = self.op.lock().unwrap().clone(); + Ok(async move { + let mut stream = op.execute().await?; + let mut data_batch = Vec::new(); + let cursor = std::io::Cursor::new(&mut data_batch); + let mut writer = FileWriter::try_new(cursor, stream.schema().as_ref())?; + + while let Some(batch) = stream.next().await { + writer.write(&batch?)?; } - _ => Ok(()), - } - } - pub(crate) async fn legacy_to_arrow_inner(&mut self) -> napi::Result> { - let mut stream = self.op.lock().unwrap().execute().await?; + writer.finish()?; + drop(writer); - let mut data_batch = vec![]; - let cursor = std::io::Cursor::new(&mut data_batch); - let mut writer = - FileWriter::try_new(cursor, stream.schema().as_ref()).map_err(JsGlareDbError::from)?; - - while let Some(batch) = stream.next().await { - let batch = batch.map_err(JsGlareDbError::from)?; - writer.write(&batch).map_err(JsGlareDbError::from)?; + Ok::, JsGlareDbError>(data_batch) } - - writer.finish().map_err(JsGlareDbError::from)?; - drop(writer); - - Ok(data_batch) - } - - pub(crate) async fn legacy_show(&mut self) -> napi::Result<()> { - print_batch(&mut self.0).await?; - Ok(()) + .await?) } } @@ -68,25 +52,30 @@ impl JsExecution { #[napi(catch_unwind)] pub async fn show(&self) -> napi::Result<()> { - let _res = self - .op - .lock() - .unwrap() - .execute() - .await - .map_err(JsGlareDbError::from)?; - Ok(()) + let mut op = self.op.lock().unwrap().clone(); + Ok(async move { + let stream = op.execute().await?; + print_record_batches(stream).await + } + .await?) } #[napi(catch_unwind)] pub async fn execute(&self) -> napi::Result<()> { - self.execute_inner().await?.execute().await?; - Ok(()) + let mut op = self.op.lock().unwrap().clone(); + Ok(async move { + let mut stream = op.call(); + while let Some(r) = stream.next().await { + let _ = r?; + } + Ok::<_, JsGlareDbError>(()) + } + .await?) } #[napi(catch_unwind)] pub async fn to_ipc(&self) -> napi::Result { - let inner = self.execute_inner().await?.to_arrow_inner().await?; + let inner = self.to_arrow_inner().await?; Ok(inner.into()) } @@ -111,28 +100,17 @@ impl JsExecution { } } -async fn print_batch(result: &mut ExecutionResult) -> napi::Result<()> { - match result { - ExecutionResult::Query { stream, .. } => { - let schema = stream.schema(); - let batches = stream - .collect::>() - .await - .into_iter() - .collect::, _>>() - .map_err(JsGlareDbError::from)?; - - let disp = pretty::pretty_format_batches( - &schema, - &batches, - Some(terminal_util::term_width()), - None, - ) - .map_err(|e| napi::Error::from_reason(e.to_string()))?; - - println!("{}", disp); - Ok(()) - } - _ => Err(napi::Error::from_reason("Not able to show executed result")), - } +async fn print_record_batches(stream: SendableRecordBatchStream) -> Result<(), JsGlareDbError> { + let schema = stream.schema(); + let batches = stream + .collect::>() + .await + .into_iter() + .collect::, _>>()?; + + let disp = + pretty::pretty_format_batches(&schema, &batches, Some(terminal_util::term_width()), None)?; + + println!("{}", disp); + Ok(()) } diff --git a/bindings/nodejs/src/lib.rs b/bindings/nodejs/src/lib.rs old mode 100644 new mode 100755 diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 76c2039fc..100c913a4 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -28,7 +28,7 @@ pub use datafusion::physical_plan::SendableRecordBatchStream; use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; -use sqlexec::engine::{Engine, EngineBackend, TrackedSession}; +use sqlexec::engine::{Engine, EngineStorage, TrackedSession}; pub use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; From 943af802f22c868bf3f27eebbe2e9eb01ea4e9fc Mon Sep 17 00:00:00 2001 From: tycho garen Date: Thu, 11 Apr 2024 19:45:40 -0400 Subject: [PATCH 29/34] build update --- bindings/nodejs/index.d.ts | 16 ++++++++-------- bindings/nodejs/index.js | 4 ++-- bindings/nodejs/src/connection.rs | 4 +--- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/bindings/nodejs/index.d.ts b/bindings/nodejs/index.d.ts index 912163c94..063a462c1 100644 --- a/bindings/nodejs/index.d.ts +++ b/bindings/nodejs/index.d.ts @@ -15,11 +15,11 @@ export function connect(dataDirOrCloudUrl?: string | undefined | null, options?: /** A connected session to a GlareDB database. */ export class Connection { /** - * Returns a default connection to the global in-memory database. + * Returns the default connection to a global in-memory database. * - * The database is only initialized once, and all subsequent calls - * will return the same connection, and therefore have access to the - * same data and database. + * The database is only initialized once, and all subsequent + * calls will return the same connection object and therefore + * have access to the same data. */ static defaultInMemory(): Promise /** @@ -61,7 +61,7 @@ export class Connection { * await con.sql('create table my_table (a int)').then(cursor => cursor.execute()) * ``` */ - sql(query: string): Promise + sql(query: string): Promise /** * Run a PRQL query against a GlareDB database. Does not change * the state or dialect of the connection object. @@ -70,14 +70,14 @@ export class Connection { * import glaredb from "@glaredb/glaredb" * * let con = glaredb.connect() - * let cursor = await con.sql('from my_table | take 1'); + * let cursor = await con.prql('from my_table | take 1'); * await cursor.show() * ``` * * All operations execute lazily when their results are * processed. */ - prql(query: string): Promise + prql(query: string): Promise /** * Execute a query. * @@ -96,7 +96,7 @@ export class Connection { /** Close the current session. */ close(): Promise } -export class JsLogicalPlan { +export class JsExecution { toString(): string show(): Promise execute(): Promise diff --git a/bindings/nodejs/index.js b/bindings/nodejs/index.js index 1f7eb412d..66fa9f103 100644 --- a/bindings/nodejs/index.js +++ b/bindings/nodejs/index.js @@ -252,8 +252,8 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { connect, Connection, JsLogicalPlan } = nativeBinding +const { connect, Connection, JsExecution } = nativeBinding module.exports.connect = connect module.exports.Connection = Connection -module.exports.JsLogicalPlan = JsLogicalPlan +module.exports.JsExecution = JsExecution diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index d4541b4c7..9408e7d24 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -97,9 +97,7 @@ impl Connection { /// processed. #[napi(catch_unwind)] pub async fn prql(&self, query: String) -> napi::Result { - let mut op = self.inner.prql(query); - op.execute().await.map_err(JsGlareDbError::from)?; - Ok(op.into()) + Ok(self.inner.prql(query).into()) } /// Execute a query. From 06faa49de57daa1c96af05622c724a618d82af6f Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 12 Apr 2024 13:27:33 -0400 Subject: [PATCH 30/34] fix: lazy sdk implementation --- crates/glaredb/src/lib.rs | 176 +++++++++++++++++++++++++++++++++----- 1 file changed, 156 insertions(+), 20 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index dd532fb64..4d12f7b4a 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -226,6 +226,7 @@ impl Connection { query: query.into(), conn: Arc::new(self.clone()), schema: None, + plan: None, } } @@ -239,6 +240,7 @@ impl Connection { query: query.into(), conn: Arc::new(self.clone()), schema: None, + plan: None, } } @@ -252,6 +254,7 @@ impl Connection { query: query.into(), conn: Arc::new(self.clone()), schema: None, + plan: None, } } } @@ -293,28 +296,56 @@ impl From> for RecordStream { } } +impl RecordStream { + // Collects all of the record batches in a stream, aborting if + // there are any errors. + pub async fn all(&mut self) -> Result, DataFusionError> { + let mut out = Vec::new(); + let stream = &mut self.0; + + while let Some(b) = stream.next().await { + out.push(b?); + } + + Ok(out) + } + + // Iterates through the stream, ensuring propagating any errors, + // but discarding all of the data. + pub async fn check(&mut self) -> Result<(), DataFusionError> { + let stream = &mut self.0; + + while let Some(b) = stream.next().await { + b?; + } + + Ok(()) + } +} + #[derive(Debug, Clone)] enum OperationType { - /// SQL operations create a lazy operation that runs DDL/DML - /// operations directly, and executes other queries when the - /// results are iterated. + /// SQL operations create a operation that runs DDL/DML operations + /// directly, and executes other queries lazily when the results + /// are iterated. Sql, /// PRQL, which does not support DDL/DML in our implementation, /// creates a lazy query object that only runs when the results /// are iterated. Prql, /// Execute Operations run a SQL operation directly when the - /// `Operation`'s `execute()` method runs. + /// `Operation`'s `evalutate()` or `resolve()` methods run. Execute, } #[derive(Debug, Clone)] -#[must_use = "operations do nothing unless call() or execute() run"] +#[must_use = "operations do nothing unless evaluate() or resolve() run"] pub struct Operation { op: OperationType, query: String, conn: Arc, schema: Option>, + plan: Option, } impl ToString for Operation { @@ -330,23 +361,115 @@ impl Operation { self.schema.clone() } - /// Executes the query, according to the semantics of the operation's - /// type. Returns an error if there was a problem parsing the - /// query or creating a stream. Operations created with - /// `execute()` run when this `execute()` method runs. For - /// operations with the `sql()` method, write operations and DDL - /// operations run before `execute()` returns. All other - /// operations are lazy and only execute when the results are - /// processed. - pub async fn execute(&mut self) -> Result { + /// Evaluate constructs a plan for the query, and in the case of + /// all `OperationType::Execute` operations and + /// `OperationType::Sql` operations that write data, the operation + /// run immediately. All other operations run when `.resolve()` is + /// called. + pub async fn evaluate(&mut self) -> Result { match self.op { OperationType::Sql => { let mut ses = self.conn.session.lock().await; + let plan = ses.create_logical_plan(&self.query).await?; - let op = OperationInfo::new().with_query_text(self.query.clone()); - let schema = Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)); + + self.schema + .replace(Arc::new(plan.output_schema().unwrap_or_else(Schema::empty))); + + match plan.to_owned().try_into_datafusion_plan()? { + LogicalPlan::Dml(_) + | LogicalPlan::Ddl(_) + | LogicalPlan::Copy(_) + | LogicalPlan::Extension(_) => { + RecordStream::from(Self::process_result( + ses.execute_logical_plan( + plan, + &OperationInfo::new().with_query_text(self.query.clone()), + ) + .await? + .1, + )) + .check() + .await?; + } + _ => { + self.plan.replace(plan); + } + }; + + Ok(self.clone()) + } + OperationType::Prql => { + let plan = self + .conn + .session + .lock() + .await + .prql_to_lp(&self.query) + .await?; + + self.schema + .replace(Arc::new(plan.output_schema().unwrap_or_else(Schema::empty))); + + self.plan.replace(plan); + + Ok(self.clone()) + } + OperationType::Execute => { + let mut ses = self.conn.session.lock().await; + let plan = ses.create_logical_plan(&self.query).await?; + + self.schema + .replace(Arc::new(plan.output_schema().unwrap_or_else(Schema::empty))); + + RecordStream::from(Self::process_result( + ses.execute_logical_plan( + plan, + &OperationInfo::new().with_query_text(self.query.clone()), + ) + .await? + .1, + )) + .check() + .await?; + + Ok(self.clone()) + } + } + } + + /// Resolves the results of the query, according to the semantics + /// of the operation's type. Uses the plan built during + /// `evaluate()` if populated, but will re-plan on subsequent + /// calls or when evaluate isn't called first. Returns an error if + /// there is problem parsing the query or creating a + /// stream. Operations created with `execute()` run when the + /// `resolve()` method runs. For operations with the `sql()` + /// method, write operations and DDL operations run before + /// `resolve()` returns. All other operations are lazy and only + /// execute as the results are processed. + pub async fn resolve(&mut self) -> Result { + match self.op { + OperationType::Sql => { + let mut ses = self.conn.session.lock().await; + + let plan = if self.plan.is_some() { + self.plan.take().unwrap() + } else { + self.schema = None; + ses.prql_to_lp(&self.query).await? + }; + + let schema = if self.schema.is_some() { + self.schema.clone().unwrap() + } else { + self.schema + .insert(Arc::new(plan.output_schema().unwrap_or_else(Schema::empty))) + .to_owned() + }; self.schema.replace(schema.clone()); + let op = OperationInfo::new().with_query_text(self.query.clone()); match plan.to_owned().try_into_datafusion_plan()? { LogicalPlan::Dml(_) | LogicalPlan::Ddl(_) @@ -375,11 +498,24 @@ impl Operation { } OperationType::Prql => { let mut ses = self.conn.session.lock().await; - let plan = ses.prql_to_lp(&self.query).await?; - let op = OperationInfo::new().with_query_text(self.query.clone()); - let schema = Arc::new(plan.output_schema().unwrap_or_else(Schema::empty)); + + let plan = if self.plan.is_some() { + self.plan.take().unwrap() + } else { + self.schema = None; + ses.prql_to_lp(&self.query).await? + }; + + let schema = if self.schema.is_some() { + self.schema.clone().unwrap() + } else { + self.schema + .insert(Arc::new(plan.output_schema().unwrap_or_else(Schema::empty))) + .to_owned() + }; let ses_clone = self.conn.session.clone(); + let op = OperationInfo::new().with_query_text(self.query.clone()); Ok(Self::process_result(ExecutionResult::Query { stream: Box::pin(RecordBatchStreamAdapter::new( schema.clone(), @@ -413,7 +549,7 @@ impl Operation { let mut op = self.clone(); RecordStream(Box::pin( futures::stream::once(async move { - match op.execute().await { + match op.resolve().await { Err(err) => Self::handle_error(err), Ok(stream) => stream, } From d0e696cab82d63b23264854db76f65d775a448cd Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 12 Apr 2024 15:43:58 -0400 Subject: [PATCH 31/34] fix: cleanup calling --- bindings/nodejs/src/connection.rs | 22 +++++++++++++++++----- bindings/nodejs/src/execution.rs | 29 +++++++---------------------- crates/glaredb/src/lib.rs | 2 +- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index 9408e7d24..991df57f0 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -77,9 +77,13 @@ impl Connection { /// ``` #[napi(catch_unwind)] pub async fn sql(&self, query: String) -> napi::Result { - let mut op = self.inner.sql(query); - op.execute().await.map_err(JsGlareDbError::from)?; - Ok(op.into()) + Ok(self + .inner + .sql(query) + .evaluate() + .await + .map_err(JsGlareDbError::from)? + .into()) } /// Run a PRQL query against a GlareDB database. Does not change @@ -97,7 +101,13 @@ impl Connection { /// processed. #[napi(catch_unwind)] pub async fn prql(&self, query: String) -> napi::Result { - Ok(self.inner.prql(query).into()) + Ok(self + .inner + .prql(query) + .evaluate() + .await + .map_err(JsGlareDbError::from)? + .into()) } /// Execute a query. @@ -116,9 +126,11 @@ impl Connection { pub async fn execute(&self, query: String) -> napi::Result<()> { self.inner .execute(query) - .execute() + .call() + .check() .await .map_err(JsGlareDbError::from)?; + Ok(()) } diff --git a/bindings/nodejs/src/execution.rs b/bindings/nodejs/src/execution.rs index 18303663c..56d5dee16 100644 --- a/bindings/nodejs/src/execution.rs +++ b/bindings/nodejs/src/execution.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, Mutex}; use arrow_util::pretty; use datafusion::arrow::ipc::writer::FileWriter; use futures::stream::StreamExt; -use glaredb::{RecordBatch, SendableRecordBatchStream}; +use glaredb::{RecordStream, SendableRecordBatchStream}; use crate::error::JsGlareDbError; @@ -25,7 +25,7 @@ impl JsExecution { pub(crate) async fn to_arrow_inner(&self) -> napi::Result> { let mut op = self.op.lock().unwrap().clone(); Ok(async move { - let mut stream = op.execute().await?; + let mut stream = op.resolve().await?; let mut data_batch = Vec::new(); let cursor = std::io::Cursor::new(&mut data_batch); let mut writer = FileWriter::try_new(cursor, stream.schema().as_ref())?; @@ -53,30 +53,18 @@ impl JsExecution { #[napi(catch_unwind)] pub async fn show(&self) -> napi::Result<()> { let mut op = self.op.lock().unwrap().clone(); - Ok(async move { - let stream = op.execute().await?; - print_record_batches(stream).await - } - .await?) + Ok(async move { print_record_batches(op.resolve().await?).await }.await?) } #[napi(catch_unwind)] pub async fn execute(&self) -> napi::Result<()> { let mut op = self.op.lock().unwrap().clone(); - Ok(async move { - let mut stream = op.call(); - while let Some(r) = stream.next().await { - let _ = r?; - } - Ok::<_, JsGlareDbError>(()) - } - .await?) + Ok(async move { Ok::<_, JsGlareDbError>(op.call().check().await?) }.await?) } #[napi(catch_unwind)] pub async fn to_ipc(&self) -> napi::Result { - let inner = self.to_arrow_inner().await?; - Ok(inner.into()) + Ok(self.to_arrow_inner().await?.into()) } #[napi(ts_return_type = "pl.DataFrame")] @@ -102,11 +90,8 @@ impl JsExecution { async fn print_record_batches(stream: SendableRecordBatchStream) -> Result<(), JsGlareDbError> { let schema = stream.schema(); - let batches = stream - .collect::>() - .await - .into_iter() - .collect::, _>>()?; + let mut stream: RecordStream = stream.into(); + let batches = stream.to_vec().await?; let disp = pretty::pretty_format_batches(&schema, &batches, Some(terminal_util::term_width()), None)?; diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 4492204ae..627a8df35 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -314,7 +314,7 @@ impl From> for RecordStream { impl RecordStream { // Collects all of the record batches in a stream, aborting if // there are any errors. - pub async fn all(&mut self) -> Result, DataFusionError> { + pub async fn to_vec(&mut self) -> Result, DataFusionError> { let mut out = Vec::new(); let stream = &mut self.0; From 0ecc8834666e9d215b2caca339e9f20c08b27e97 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 12 Apr 2024 15:44:53 -0400 Subject: [PATCH 32/34] backport method naming --- crates/glaredb/src/lib.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 4d12f7b4a..627a8df35 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -29,7 +29,7 @@ use derive_builder::Builder; use futures::lock::Mutex; use futures::stream::{Stream, StreamExt}; use sqlexec::engine::{Engine, EngineStorage, TrackedSession}; -use sqlexec::environment::EnvironmentReader; +pub use sqlexec::environment::EnvironmentReader; use sqlexec::errors::ExecError; use sqlexec::remote::client::RemoteClientType; use sqlexec::session::ExecutionResult; @@ -125,6 +125,21 @@ impl ConnectOptionsBuilder { self } + pub fn cloud_addr_opt(&mut self, v: Option) -> &mut Self { + self.cloud_addr = Some(v); + self + } + + pub fn disable_tls_opt(&mut self, v: Option) -> &mut Self { + self.disable_tls = Some(v); + self + } + + pub fn storage_options_opt(&mut self, v: Option>) -> &mut Self { + self.storage_options = v; + self + } + /// Constructs an in-memory connection configuration, which can be /// used for default operations and tests without impacting the /// file system. All state (tables, catalog, etc,) are local, but @@ -299,7 +314,7 @@ impl From> for RecordStream { impl RecordStream { // Collects all of the record batches in a stream, aborting if // there are any errors. - pub async fn all(&mut self) -> Result, DataFusionError> { + pub async fn to_vec(&mut self) -> Result, DataFusionError> { let mut out = Vec::new(); let stream = &mut self.0; From 1a306e27ad0b3a243687d90c4e20825b5aacc961 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 12 Apr 2024 16:18:58 -0400 Subject: [PATCH 33/34] copypasta --- crates/glaredb/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/glaredb/src/lib.rs b/crates/glaredb/src/lib.rs index 627a8df35..963a8c18d 100644 --- a/crates/glaredb/src/lib.rs +++ b/crates/glaredb/src/lib.rs @@ -472,7 +472,7 @@ impl Operation { self.plan.take().unwrap() } else { self.schema = None; - ses.prql_to_lp(&self.query).await? + ses.create_logical_plan(&self.query).await? }; let schema = if self.schema.is_some() { From 3f472a52afaf855729d54f4eeed7e5e9a36b8172 Mon Sep 17 00:00:00 2001 From: tycho garen Date: Fri, 12 Apr 2024 17:35:28 -0400 Subject: [PATCH 34/34] fix type names --- bindings/nodejs/glaredb.js | 2 +- bindings/nodejs/index.d.ts | 6 +++--- bindings/nodejs/index.js | 4 ++-- bindings/nodejs/src/connection.rs | 6 +++--- bindings/nodejs/src/execution.rs | 8 ++++---- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/bindings/nodejs/glaredb.js b/bindings/nodejs/glaredb.js index afba26f63..6e8a8d9cf 100644 --- a/bindings/nodejs/glaredb.js +++ b/bindings/nodejs/glaredb.js @@ -3,7 +3,7 @@ const glaredb = require('./index.js') // Some of methods can't be performed through `n-api` // So we need to monkey patch them here // The methods should still be defined in rust so we can keep a consistent `index.d.ts` file. -Object.assign(glaredb.JsLogicalPlan.prototype, { +Object.assign(glaredb.JsExecutionOutput.prototype, { async toPolars() { try { const pl = require("nodejs-polars") diff --git a/bindings/nodejs/index.d.ts b/bindings/nodejs/index.d.ts index 063a462c1..af1e75c0f 100644 --- a/bindings/nodejs/index.d.ts +++ b/bindings/nodejs/index.d.ts @@ -61,7 +61,7 @@ export class Connection { * await con.sql('create table my_table (a int)').then(cursor => cursor.execute()) * ``` */ - sql(query: string): Promise + sql(query: string): Promise /** * Run a PRQL query against a GlareDB database. Does not change * the state or dialect of the connection object. @@ -77,7 +77,7 @@ export class Connection { * All operations execute lazily when their results are * processed. */ - prql(query: string): Promise + prql(query: string): Promise /** * Execute a query. * @@ -96,7 +96,7 @@ export class Connection { /** Close the current session. */ close(): Promise } -export class JsExecution { +export class JsExecutionOutput { toString(): string show(): Promise execute(): Promise diff --git a/bindings/nodejs/index.js b/bindings/nodejs/index.js index 66fa9f103..7fa62fd92 100644 --- a/bindings/nodejs/index.js +++ b/bindings/nodejs/index.js @@ -252,8 +252,8 @@ if (!nativeBinding) { throw new Error(`Failed to load native binding`) } -const { connect, Connection, JsExecution } = nativeBinding +const { connect, Connection, JsExecutionOutput } = nativeBinding module.exports.connect = connect module.exports.Connection = Connection -module.exports.JsExecution = JsExecution +module.exports.JsExecutionOutput = JsExecutionOutput diff --git a/bindings/nodejs/src/connection.rs b/bindings/nodejs/src/connection.rs index 991df57f0..07e4d6939 100644 --- a/bindings/nodejs/src/connection.rs +++ b/bindings/nodejs/src/connection.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use async_once_cell::OnceCell; use crate::error::JsGlareDbError; -use crate::execution::JsExecution; +use crate::execution::JsExecutionOutput; /// A connected session to a GlareDB database. #[napi] @@ -76,7 +76,7 @@ impl Connection { /// await con.sql('create table my_table (a int)').then(cursor => cursor.execute()) /// ``` #[napi(catch_unwind)] - pub async fn sql(&self, query: String) -> napi::Result { + pub async fn sql(&self, query: String) -> napi::Result { Ok(self .inner .sql(query) @@ -100,7 +100,7 @@ impl Connection { /// All operations execute lazily when their results are /// processed. #[napi(catch_unwind)] - pub async fn prql(&self, query: String) -> napi::Result { + pub async fn prql(&self, query: String) -> napi::Result { Ok(self .inner .prql(query) diff --git a/bindings/nodejs/src/execution.rs b/bindings/nodejs/src/execution.rs index 56d5dee16..4dbf12499 100644 --- a/bindings/nodejs/src/execution.rs +++ b/bindings/nodejs/src/execution.rs @@ -9,11 +9,11 @@ use crate::error::JsGlareDbError; #[napi] #[derive(Clone, Debug)] -pub struct JsExecution { +pub struct JsExecutionOutput { op: Arc>, } -impl From for JsExecution { +impl From for JsExecutionOutput { fn from(opt: glaredb::Operation) -> Self { Self { op: Arc::new(Mutex::new(opt)), @@ -21,7 +21,7 @@ impl From for JsExecution { } } -impl JsExecution { +impl JsExecutionOutput { pub(crate) async fn to_arrow_inner(&self) -> napi::Result> { let mut op = self.op.lock().unwrap().clone(); Ok(async move { @@ -44,7 +44,7 @@ impl JsExecution { } #[napi] -impl JsExecution { +impl JsExecutionOutput { #[napi(catch_unwind)] pub fn to_string(&self) -> napi::Result { Ok(format!("{:?}", self.op.lock().unwrap()))