risingwavelabs · TennyZhuang · Mar 15, 2023 · Mar 15, 2023 · Mar 15, 2023 · Mar 15, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -35,6 +35,7 @@ members = [
   "src/tests/regress",
   "src/tests/simulation",
   "src/tests/sqlsmith",
+  "src/tests/state_cleaning_test",
   "src/tracing",
   "src/udf",
   "src/utils/local_stats_alloc",

diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml
@@ -64,7 +64,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "signal",
 ] }
 tokio-retry = "0.3"
-tokio-stream = { version = "0.1", features = ["net"] }
+tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "0c25710", features = ["net"] }
 tonic = { version = "0.2", package = "madsim-tonic" }
 tower = { version = "0.4", features = ["util", "load-shed"] }
 tracing = "0.1"

diff --git a/src/stream/src/error.rs b/src/stream/src/error.rs
@@ -36,7 +36,7 @@ enum Inner {
     #[error("Array/Chunk error: {0}")]
     Array(ArrayError),
 
-    #[error("Executor error: {0}")]
+    #[error("Executor error: {0:?}")]
     Executor(Box<StreamExecutorError>),
 
     #[error(transparent)]

diff --git a/src/tests/state_cleaning_test/Cargo.toml b/src/tests/state_cleaning_test/Cargo.toml
@@ -0,0 +1,37 @@
+[package]
+name = "risingwave_state_cleaning_test"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+keywords = { workspace = true }
+license = { workspace = true }
+repository = { workspace = true }
+
+[package.metadata.cargo-machete]
+ignored = ["workspace-hack"]
+
+[package.metadata.cargo-udeps.ignore]
+normal = ["workspace-hack"]
+
+[dependencies]
+anyhow = "1"
+chrono = "0.4"
+clap = { version = "4", features = ["derive"] }
+futures = { version = "0.3", default-features = false, features = ["alloc"] }
+itertools = "0.10"
+regex = "1"
+risingwave_rt = { path = "../../utils/runtime" }
+serde = { version = "1", features = ["derive"] }
+serde_with = "2"
+tokio = { version = "0.2", package = "madsim-tokio" }
+tokio-postgres = "0.7.7"
+tokio-stream = { version = "0.1", features = ["fs"] }
+toml = "0.4"
+tracing = "0.1"
+
+[target.'cfg(not(madsim))'.dependencies]
+workspace-hack = { path = "../../workspace-hack" }
+
+[[bin]]
+name = "risingwave_state_cleaning_test"
+path = "src/bin/main.rs"
diff --git a/src/tests/state_cleaning_test/README.md b/src/tests/state_cleaning_test/README.md
@@ -0,0 +1,18 @@
+# risingwave_state_cleaning_test
+
+The `risingwave_state_cleaning_test` crate has been designed specifically to test whether RisingWave can effectively clean outdated state records prior to reaching the watermark on time. Its functionality is described using TOML files, which specify the tests that should be executed. By utilizing this crate, developers can ensure that RisingWave is capable of properly managing state records, thereby improving overall application performance and providing a more reliable end-user experience.
+
+## TOML files
+
+The TOML files describe the tests that should be run. Each test is represented as a table in the TOML file with the following format:
+
+```toml
+[[test]]
+name = "test name" # A human-readable name for the test
+init_sqls = [ "SQL statement 1", "SQL statement 2", ... ] # A list of SQL statements to prepare the test environment
+bound_tables = [
+    { pattern = "table name pattern", limit = number }, # A pattern to match table names and a limit on the number of rows for each table
+    { pattern = "table name pattern", limit = number },
+    ...
+] # A list of tables that should be checked.
+```
diff --git a/src/tests/state_cleaning_test/data/agg.toml b/src/tests/state_cleaning_test/data/agg.toml
@@ -0,0 +1,42 @@
+[[test]]
+name = "window_hash_agg"
+# Prepare the tesing table & mviews.
+init_sqls = [
+    # Set up the base table.
+    """
+    CREATE TABLE t1 (
+        created_at timestamp,
+        grp int,
+        v int,
+        WATERMARK FOR created_at AS created_at - interval '9' second
+    ) APPEND ONLY WITH (
+        connector = 'datagen',
+        rows_per_second = 100,
+        datagen.split.num = 16,
+        fields.created_at.max_past_mode = 'relative',
+        fields.created_at.max_past = '10s',
+        fields.grp.min = 0,
+        fields.grp.max = 5,
+    );
+    """,
+    # Set up the tumble window mview.
+    """
+    CREATE MATERIALIZED VIEW mv_tumble AS
+    SELECT grp, SUM(v), window_start
+    FROM tumble(t1, created_at, INTERVAL '1' SECOND)
+    GROUP BY window_start, grp;
+    """,
+    # Set up the hop window mview.
+    """
+    CREATE MATERIALIZED VIEW mv_hop AS
+    SELECT grp, SUM(v), window_start
+    FROM hop(t1, created_at, INTERVAL '1' SECOND, INTERVAL '3' SECOND)
+    GROUP BY window_start, grp;
+    """,
+]
+bound_tables = [
+    # Tumble window agg state table.
+    { pattern = '__internal_mv_tumble_\d+_hashaggresult_\d+', limit = 200 },
+    # Hop window agg state table.
+    { pattern = '__internal_mv_hop_\d+_hashaggresult_\d+', limit = 400 },
+]