Merge branch 'yiming/commit-partial-table' into yiming/snapshot-backfill

risingwavelabs · Aug 15, 2024 · c59e800 · c59e800
2 parents 24b0909 + deb33db
commit c59e800
Show file tree

Hide file tree

Showing 37 changed files with 525 additions and 94 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -77,7 +77,7 @@ license = "Apache-2.0"
 repository = "https://github.com/risingwavelabs/risingwave"
 
 [workspace.dependencies]
-foyer = { version = "0.10.1", features = ["nightly", "mtrace"] }
+foyer = { version = "0.10.4", features = ["nightly", "mtrace"] }
 apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "25113ba88234a9ae23296e981d8302c290fdaa4b", features = [
     "snappy",
     "zstandard",

diff --git a/README.md b/README.md
@@ -56,7 +56,7 @@
 
 RisingWave is a Postgres-compatible SQL engine engineered to provide the <i><b>simplest</b></i> and <i><b>most cost-efficient</b></i> approach for <b>processing</b>, <b>analyzing</b>, and <b>managing</b> real-time event streaming data.
 
-![RisingWave](https://github.com/risingwavelabs/risingwave/assets/41638002/10c44404-f78b-43ce-bbd9-3646690acc59)
+![RisingWave](./docs/dev/src/images/architecture_20240814.png)
 
 ## When to use RisingWave?
 RisingWave can ingest millions of events per second, continuously join live data streams with historical tables, and serve ad-hoc queries in real-time. Typical use cases include, but are not limited to:

diff --git a/ci/scripts/run-e2e-test.sh b/ci/scripts/run-e2e-test.sh
@@ -90,9 +90,7 @@ echo "--- e2e, $mode, batch"
 RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
 cluster_start
 sqllogictest -p 4566 -d dev './e2e_test/ddl/**/*.slt' --junit "batch-ddl-${profile}" --label "can-use-recover"
-if [[ "$mode" != "single-node" ]]; then
-  sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}"
-fi
+sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}"
 
 if [[ $mode != "single-node" ]]; then
   sqllogictest -p 4566 -d dev './e2e_test/visibility_mode/*.slt' --junit "batch-${profile}"

diff --git a/docs/dev/src/images/architecture_20240814.png b/docs/dev/src/images/architecture_20240814.png
diff --git a/e2e_test/sink/license.slt b/e2e_test/sink/license.slt
@@ -0,0 +1,195 @@
+statement ok
+SET RW_IMPLICIT_FLUSH TO true;
+
+statement ok
+ALTER SYSTEM SET license_key TO '';
+
+statement ok
+CREATE TABLE t (k INT);
+
+statement error
+CREATE SINK dynamodb_sink
+FROM
+  t
+WITH
+(
+  connector = 'dynamodb',
+  table = 'xx',
+  primary_key = 'k',
+  region = 'xx',
+  access_key = 'xx',
+  secret_key = 'xx'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: Internal error
+  4: feature DynamoDbSink is only available for tier Paid and above, while the current tier is Free
+
+Hint: You may want to set a license key with `ALTER SYSTEM SET license_key = '...';` command.
+
+
+statement error
+CREATE SINK snowflake_sink
+FROM t
+WITH (
+    connector = 'snowflake',
+    type = 'append-only',
+    force_append_only = 'true',
+    s3.bucket_name = 'xx',
+    s3.credentials.access = 'xx',
+    s3.credentials.secret = 'xx',
+    s3.region_name = 'xx',
+    s3.path = 'xx',
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: Internal error
+  4: feature SnowflakeSink is only available for tier Paid and above, while the current tier is Free
+
+Hint: You may want to set a license key with `ALTER SYSTEM SET license_key = '...';` command.
+
+
+statement error
+CREATE SINK opensearch_sink
+FROM t
+WITH (
+    connector = 'opensearch',
+    url = 'xx',
+    username = 'xx',
+    password = 'xx',
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: feature OpenSearchSink is only available for tier Paid and above, while the current tier is Free
+
+Hint: You may want to set a license key with `ALTER SYSTEM SET license_key = '...';` command.
+
+
+statement error
+CREATE SINK bigquery_sink
+FROM
+  t
+WITH
+(
+  connector = 'bigquery',
+  type = 'append-only',
+  force_append_only='true',
+  bigquery.local.path= 'xx',
+  bigquery.project= 'xx',
+  bigquery.dataset= 'xx',
+  bigquery.table= 'xx'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: Internal error
+  4: feature BigQuerySink is only available for tier Paid and above, while the current tier is Free
+
+Hint: You may want to set a license key with `ALTER SYSTEM SET license_key = '...';` command.
+
+
+statement ok
+ALTER SYSTEM SET license_key TO DEFAULT;
+
+statement ok
+flush;
+
+statement error
+CREATE SINK dynamodb_sink
+FROM
+  t
+WITH
+(
+  connector = 'dynamodb',
+  table = 'xx',
+  primary_key = 'xx',
+  region = 'xx',
+  access_key = 'xx',
+  secret_key = 'xx'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Sink error
+  2: Sink primary key column not found: xx. Please use ',' as the delimiter for different primary key columns.
+
+
+statement ok
+CREATE SINK snowflake_sink
+FROM t
+WITH (
+    connector = 'snowflake',
+    type = 'append-only',
+    force_append_only = 'true',
+    s3.bucket_name = 'xx',
+    s3.credentials.access = 'xx',
+    s3.credentials.secret = 'xx',
+    s3.region_name = 'xx',
+    s3.path = 'xx',
+);
+
+
+statement error
+CREATE SINK opensearch_sink
+FROM t
+WITH (
+    connector = 'opensearch',
+    url = 'xx',
+    username = 'xx',
+    password = 'xx',
+    index = 'xx',
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: sink cannot pass validation: INTERNAL: Connection is closed
+
+
+statement error
+CREATE SINK bigquery_sink
+FROM
+  t
+WITH
+(
+  connector = 'bigquery',
+  type = 'append-only',
+  force_append_only='true',
+  bigquery.local.path= 'xx',
+  bigquery.project= 'xx',
+  bigquery.dataset= 'xx',
+  bigquery.table= 'xx'
+);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: BigQuery error
+  4: No such file or directory (os error 2)
+
+
+statement ok
+DROP SINK snowflake_sink;
+
+statement ok
+DROP TABLE t;
diff --git a/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateful_agg.slt b/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateful_agg.slt
@@ -0,0 +1,80 @@
+# Single phase approx percentile
+statement ok
+create table t(p_col double, grp_col int);
+
+statement ok
+insert into t select a, 1 from generate_series(-1000, 1000) t(a);
+
+statement ok
+flush;
+
+query I
+select
+  percentile_cont(0.01) within group (order by p_col) as p01,
+  min(p_col),
+  percentile_cont(0.5) within group (order by p_col) as p50,
+  count(*),
+  percentile_cont(0.99) within group (order by p_col) as p99
+from t;
+----
+-980 -1000 0 2001 980
+
+statement ok
+create materialized view m1 as
+ select
+     approx_percentile(0.01, 0.01) within group (order by p_col) as p01,
+     min(p_col),
+     approx_percentile(0.5, 0.01) within group (order by p_col) as p50,
+     count(*),
+     approx_percentile(0.99, 0.01) within group (order by p_col) as p99
+ from t;
+
+query I
+select * from m1;
+----
+-982.5779489474152 -1000 0 2001 982.5779489474152
+
+# Test state encode / decode
+onlyif can-use-recover
+statement ok
+recover;
+
+onlyif can-use-recover
+sleep 10s
+
+query I
+select * from m1;
+----
+-982.5779489474152 -1000 0 2001 982.5779489474152
+
+# Test 0<x<1 values
+statement ok
+insert into t select 0.001, 1 from generate_series(1, 500);
+
+statement ok
+insert into t select 0.0001, 1 from generate_series(1, 501);
+
+statement ok
+flush;
+
+query I
+select * from m1;
+----
+-963.1209598593477 -1000 0.00009999833511933609 3002 963.1209598593477
+
+query I
+select
+  percentile_cont(0.01) within group (order by p_col) as p01,
+  min(p_col),
+  percentile_cont(0.5) within group (order by p_col) as p50,
+  count(*),
+  percentile_cont(0.99) within group (order by p_col) as p99
+from t;
+----
+-969.99 -1000 0.0001 3002 969.9899999999998
+
+statement ok
+drop materialized view m1;
+
+statement ok
+drop table t;
diff --git a/...se_approx_percentile_merge_normal_agg.slt → ...approx_percentile_merge_stateless_agg.slt b/...se_approx_percentile_merge_normal_agg.slt → ...approx_percentile_merge_stateless_agg.slt
@@ -47,19 +47,6 @@ select * from m1;
 ----
 -982.5779489474152 0 0 2001 982.5779489474152
 
-# Test state encode / decode
-onlyif can-use-recover
-statement ok
-recover;
-
-onlyif can-use-recover
-sleep 10s
-
-query I
-select * from m1;
-----
--982.5779489474152 0 0 2001 982.5779489474152
-
 # Test 0<x<1 values
 statement ok
 insert into t select 0.001, 1 from generate_series(1, 500);

diff --git a/e2e_test/streaming/union.slt b/e2e_test/streaming/union.slt
@@ -152,6 +152,9 @@ Caused by:
   Invalid input syntax: When CORRESPONDING is specified, at least one column of the left side shall have a column name that is the column name of some column of the right side in a UNION operation. Left side query column list: ("v1", "v2", "v4"). Right side query column list: ("vxx").
 
 
+statement ok
+drop table txx;
+
 statement ok
 drop table t1;