Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sqlsmith, deterministic-test): deterministic fuzz stability #7967

Merged
merged 25 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ extend-exclude = [
"**/*.svg",
"scripts",
"src/frontend/planner_test/tests/testdata",
"src/tests/sqlsmith/tests/freeze",
]
1 change: 1 addition & 0 deletions ci/scripts/cron-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ source ci/scripts/common.env.sh
export RUN_COMPACTION=1;
export RUN_META_BACKUP=1;
export RUN_DELETE_RANGE=1;
export RUN_DETERMINISTIC_SQLSMITH=1;
source ci/scripts/run-e2e-test.sh
10 changes: 8 additions & 2 deletions ci/scripts/deterministic-e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
set -euo pipefail

source ci/scripts/common.env.sh
source ci/scripts/pr.env.sh

echo "--- Download artifacts"
buildkite-agent artifact download risingwave_simulation .
Expand Down Expand Up @@ -38,5 +39,10 @@ seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2
echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, batch"
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation -j 16 ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/parallel-batch-{}.log && rm $LOGDIR/parallel-batch-{}.log'

echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing"
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log'
echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing (pre-generated-queries)"
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --run-sqlsmith-queries src/tests/sqlsmith/tests/freeze/{} 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log'

if [[ "$RUN_DETERMINISTIC_SQLSMITH" -eq "1" ]]; then
echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing (seed)"
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log'
fi
2 changes: 2 additions & 0 deletions ci/scripts/pr.env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ export RUN_COMPACTION=0;
export RUN_META_BACKUP=0;
# Don't run delete-range random test
export RUN_DELETE_RANGE=0;
# Don't run deterministic e2e fuzzing (only run pre-gen)
export RUN_DETERMINISTIC_SQLSMITH=0;

if [[ -n "$CHANGED" ]]; then
echo "origin/main SHA: $(git rev-parse origin/main)";
Expand Down
46 changes: 44 additions & 2 deletions src/tests/simulation/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,20 @@ pub struct Args {
#[clap(long)]
sqlsmith: Option<usize>,

/// Run sqlsmith pre-generated queries with the given [`files`] directory,
/// containing `ddl.sql` and `queries.sql`.
#[clap(long)]
run_sqlsmith_queries: bool,

/// Run sqlsmith to generate queries with the given testdata [`files`],
/// and output the ddl + queries to the given directory,
/// indicated by this argument.
/// We generate sqlsmith queries via `madsim` because
/// it provides a degree of determinism, and we can spawn several
/// instances in parallel.
#[clap(long)]
generate_sqlsmith_queries: Option<String>,

/// Load etcd data from toml file.
#[clap(long)]
etcd_data: Option<PathBuf>,
Expand Down Expand Up @@ -167,15 +181,43 @@ async fn main() {
cluster.create_kafka_producer(&datadir).await;
}

let seed = madsim::runtime::Handle::current().seed();
if let Some(count) = args.sqlsmith {
cluster
.run_on_client(async move {
let seed = madsim::runtime::Handle::current().seed();
let rw = RisingWave::connect("frontend".into(), "dev".into())
.await
.unwrap();
risingwave_sqlsmith::runner::run(rw.pg_client(), &args.files, count, Some(seed))
if let Some(outdir) = args.generate_sqlsmith_queries {
risingwave_sqlsmith::runner::generate(
rw.pg_client(),
&args.files,
count,
&outdir,
)
.await;
} else {
risingwave_sqlsmith::runner::run(
rw.pg_client(),
&args.files,
count,
Some(seed),
)
.await;
}
})
.await;
return;
}

if args.run_sqlsmith_queries {
let outdir = args.files;
cluster
.run_on_client(async move {
let rw = RisingWave::connect("frontend".into(), "dev".into())
.await
.unwrap();
risingwave_sqlsmith::runner::run_pre_generated(rw.pg_client(), &outdir).await;
})
.await;
return;
Expand Down
42 changes: 42 additions & 0 deletions src/tests/sqlsmith/scripts/gen_queries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

set -euxo pipefail

export TEST_NUM=32
export RW_HOME="../../../.."
export LOGDIR=".risingwave/log"
export TESTS_FOLDER="src/tests/sqlsmith/tests"
export OUTDIR="$TESTS_FOLDER/freeze"
export TESTDATA="src/tests/sqlsmith/tests/testdata"
export MADSIM_BIN="target/sim/ci-sim/risingwave_simulation"

build_madsim() {
cargo make sslt-build-all --profile ci-sim
}

generate_deterministic() {
seq "$TEST_NUM" | \
parallel "mkdir -p $OUTDIR/{}; \
MADSIM_TEST_SEED={} $MADSIM_BIN \
--sqlsmith 100 \
--generate-sqlsmith-queries $OUTDIR/{} \
$TESTDATA \
2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log"
}

generate_sqlsmith() {
mkdir -p "$OUTDIR/$1"
./risedev d
./target/debug/sqlsmith test \
--testdata ./src/tests/sqlsmith/tests/testdata \
--generate "$OUTDIR/$1"
}

main() {
cd $RW_HOME
build_madsim
generate_deterministic
cd -
}

main
16 changes: 13 additions & 3 deletions src/tests/sqlsmith/src/bin/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::time::Duration;

use clap::Parser as ClapParser;
use risingwave_sqlsmith::print_function_table;
use risingwave_sqlsmith::runner::run;
use risingwave_sqlsmith::runner::{generate, run};
use tokio_postgres::NoTls;

#[derive(ClapParser, Debug, Clone)]
Expand Down Expand Up @@ -56,6 +56,11 @@ struct TestOptions {
/// The number of test cases to generate.
#[clap(long, default_value = "100")]
count: usize,

/// Output directory - only applicable if we are generating
/// query while testing.
#[clap(long)]
generate: Option<String>,
}

#[derive(clap::Subcommand, Clone, Debug)]
Expand All @@ -73,7 +78,8 @@ async fn main() {
tracing_subscriber::fmt::init();

let opt = Opt::parse();
let opt = match opt.command {
let command = opt.command;
let opt = match command {
Commands::PrintFunctionTable => {
println!("{}", print_function_table());
return;
Expand All @@ -95,5 +101,9 @@ async fn main() {
tracing::error!("Postgres connection error: {:?}", e);
}
});
run(&client, &opt.testdata, opt.count, None).await;
if let Some(outdir) = opt.generate {
generate(&client, &opt.testdata, opt.count, &outdir).await;
} else {
run(&client, &opt.testdata, opt.count, None).await;
}
}
113 changes: 111 additions & 2 deletions src/tests/sqlsmith/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
// limitations under the License.

//! Provides E2E Test runner functionality.
use std::fs::File;
use std::io::Write;
use std::path::Path;

use itertools::Itertools;
use rand::rngs::SmallRng;
Expand All @@ -28,6 +31,106 @@ use crate::{
sql_gen, Table,
};

/// e2e test runner for pre-generated queries from sqlsmith
pub async fn run_pre_generated(client: &tokio_postgres::Client, outdir: &str) {
let ddl_path = format!("{}/ddl.sql", outdir);
let queries_path = format!("{}/queries.sql", outdir);
let ddl = std::fs::read_to_string(ddl_path).unwrap();
let queries = std::fs::read_to_string(queries_path).unwrap();
let mut setup_sql = String::with_capacity(1000);
for ddl_statement in parse_sql(&ddl) {
let sql = ddl_statement.to_string();
tracing::info!("Executing: {}", sql);
let response = client.execute(&sql, &[]).await;
if let Err(e) = response {
panic!("{}", format_fail_reason(&setup_sql, &sql, &e))
}
setup_sql.push_str(&sql);
}
for statement in parse_sql(&queries) {
let sql = statement.to_string();
tracing::info!("Executing: {}", sql);
let response = client.query(&sql, &[]).await;
if let Err(e) = response {
panic!("{}", format_fail_reason(&setup_sql, &sql, &e))
}
}
}

/// e2e query generator
/// The goal is to generate NON-FAILING queries.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can support ignoring cases, this seems not necessary any more. In my mind, we can generate cases regardless of success of failure, and then mark to ignore thse unsupported ones.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, plan to work on this in a separate PR.

/// If we encounter an expected error, just skip.
/// If we panic or encounter an unexpected error, query generation
/// should still fail.
/// Returns ddl and queries.
pub async fn generate(client: &tokio_postgres::Client, testdata: &str, count: usize, outdir: &str) {
let mut rng = rand::rngs::SmallRng::from_entropy();
let (tables, base_tables, mviews, setup_sql) = create_tables(&mut rng, testdata, client).await;

let rows_per_table = 10;
let max_rows_inserted = rows_per_table * base_tables.len();
test_sqlsmith(
client,
&mut rng,
tables.clone(),
&setup_sql,
base_tables,
max_rows_inserted,
)
.await;
tracing::info!("Passed sqlsmith tests");

let mut queries = String::with_capacity(10000);
let mut generated_queries = 0;
for _ in 0..count {
// ENABLE: https://github.com/risingwavelabs/risingwave/issues/7928
// test_session_variable(client, rng).await;
let sql = sql_gen(&mut rng, tables.clone());
tracing::info!("Executing: {}", sql);
let response = client.query(sql.as_str(), &[]).await;
let skipped = validate_response(&setup_sql, &format!("{};", sql), response);
if skipped == 0 {
generated_queries += 1;
queries.push_str(&format!("{};\n", &sql));
}
}
tracing::info!("Generated {} batch queries", generated_queries);

let mut generated_queries = 0;
for _ in 0..count {
// ENABLE: https://github.com/risingwavelabs/risingwave/issues/7928
// test_session_variable(client, rng).await;
let (sql, table) = mview_sql_gen(&mut rng, tables.clone(), "stream_query");
tracing::info!("Executing: {}", sql);
let response = client.query(&sql, &[]).await;
let skipped = validate_response(&setup_sql, &format!("{};", sql), response);
drop_mview_table(&table, client).await;
if skipped == 0 {
generated_queries += 1;
queries.push_str(&format!("{};\n", &sql));
queries.push_str(&format!("{};\n", format_drop_mview(&table)));
}
}
tracing::info!("Generated {} stream queries", generated_queries);

drop_tables(&mviews, testdata, client).await;
write_to_file(outdir, "ddl.sql", &setup_sql);
write_to_file(outdir, "queries.sql", &queries);
}

fn write_to_file(outdir: &str, name: &str, sql: &str) {
let resolved = format!("{}/{}", outdir, name);
let path = Path::new(&resolved);
let mut file = match File::create(path) {
Err(e) => panic!("couldn't create {}: {}", path.display(), e),
Ok(file) => file,
};
match file.write_all(sql.as_bytes()) {
Err(why) => panic!("couldn't write to {}: {}", path.display(), why),
Ok(_) => tracing::info!("successfully wrote to {}", path.display()),
}
}

/// e2e test runner for sqlsmith
pub async fn run(client: &tokio_postgres::Client, testdata: &str, count: usize, seed: Option<u64>) {
#[cfg(madsim)]
Expand Down Expand Up @@ -222,6 +325,8 @@ fn get_seed_table_sql(testdata: &str) -> String {
.collect::<String>()
}

/// Create the tables defined in testdata, along with some mviews.
/// TODO: Generate indexes and sinks.
async fn create_tables(
rng: &mut impl Rng,
testdata: &str,
Expand All @@ -243,7 +348,7 @@ async fn create_tables(
let create_sql = stmt.to_string();
tracing::info!("[EXECUTING CREATE TABLE]: {}", &create_sql);
client.simple_query(&create_sql).await.unwrap();
setup_sql.push_str(&format!("{};", &create_sql));
setup_sql.push_str(&format!("{};\n", &create_sql));
}

let mut mviews = vec![];
Expand All @@ -263,10 +368,14 @@ async fn create_tables(
(mvs_and_base_tables, base_tables, mviews, setup_sql)
}

fn format_drop_mview(mview: &Table) -> String {
format!("DROP MATERIALIZED VIEW IF EXISTS {}", mview.name)
}

/// Drops mview tables.
async fn drop_mview_table(mview: &Table, client: &tokio_postgres::Client) {
client
.simple_query(&format!("DROP MATERIALIZED VIEW IF EXISTS {}", mview.name))
.simple_query(&format_drop_mview(mview))
.await
.unwrap();
}
Expand Down
21 changes: 21 additions & 0 deletions src/tests/sqlsmith/tests/freeze/1/ddl.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE TABLE supplier (s_suppkey INT, s_name CHARACTER VARYING, s_address CHARACTER VARYING, s_nationkey INT, s_phone CHARACTER VARYING, s_acctbal NUMERIC, s_comment CHARACTER VARYING, PRIMARY KEY (s_suppkey));
CREATE TABLE part (p_partkey INT, p_name CHARACTER VARYING, p_mfgr CHARACTER VARYING, p_brand CHARACTER VARYING, p_type CHARACTER VARYING, p_size INT, p_container CHARACTER VARYING, p_retailprice NUMERIC, p_comment CHARACTER VARYING, PRIMARY KEY (p_partkey));
CREATE TABLE partsupp (ps_partkey INT, ps_suppkey INT, ps_availqty INT, ps_supplycost NUMERIC, ps_comment CHARACTER VARYING, PRIMARY KEY (ps_partkey, ps_suppkey));
CREATE TABLE customer (c_custkey INT, c_name CHARACTER VARYING, c_address CHARACTER VARYING, c_nationkey INT, c_phone CHARACTER VARYING, c_acctbal NUMERIC, c_mktsegment CHARACTER VARYING, c_comment CHARACTER VARYING, PRIMARY KEY (c_custkey));
CREATE TABLE orders (o_orderkey BIGINT, o_custkey INT, o_orderstatus CHARACTER VARYING, o_totalprice NUMERIC, o_orderdate DATE, o_orderpriority CHARACTER VARYING, o_clerk CHARACTER VARYING, o_shippriority INT, o_comment CHARACTER VARYING, PRIMARY KEY (o_orderkey));
CREATE TABLE lineitem (l_orderkey BIGINT, l_partkey INT, l_suppkey INT, l_linenumber INT, l_quantity NUMERIC, l_extendedprice NUMERIC, l_discount NUMERIC, l_tax NUMERIC, l_returnflag CHARACTER VARYING, l_linestatus CHARACTER VARYING, l_shipdate DATE, l_commitdate DATE, l_receiptdate DATE, l_shipinstruct CHARACTER VARYING, l_shipmode CHARACTER VARYING, l_comment CHARACTER VARYING, PRIMARY KEY (l_orderkey, l_linenumber));
CREATE TABLE nation (n_nationkey INT, n_name CHARACTER VARYING, n_regionkey INT, n_comment CHARACTER VARYING, PRIMARY KEY (n_nationkey));
CREATE TABLE region (r_regionkey INT, r_name CHARACTER VARYING, r_comment CHARACTER VARYING, PRIMARY KEY (r_regionkey));
CREATE TABLE person (id BIGINT, name CHARACTER VARYING, email_address CHARACTER VARYING, credit_card CHARACTER VARYING, city CHARACTER VARYING, state CHARACTER VARYING, date_time TIMESTAMP, extra CHARACTER VARYING, PRIMARY KEY (id));
CREATE TABLE auction (id BIGINT, item_name CHARACTER VARYING, description CHARACTER VARYING, initial_bid BIGINT, reserve BIGINT, date_time TIMESTAMP, expires TIMESTAMP, seller BIGINT, category BIGINT, extra CHARACTER VARYING, PRIMARY KEY (id));
CREATE TABLE bid (auction BIGINT, bidder BIGINT, price BIGINT, channel CHARACTER VARYING, url CHARACTER VARYING, date_time TIMESTAMP, extra CHARACTER VARYING);
CREATE TABLE alltypes1 (c1 BOOLEAN, c2 SMALLINT, c3 INT, c4 BIGINT, c5 REAL, c6 DOUBLE, c7 NUMERIC, c8 DATE, c9 CHARACTER VARYING, c10 TIME, c11 TIMESTAMP, c13 INTERVAL, c14 STRUCT<a INT>, c15 INT[], c16 CHARACTER VARYING[]);
CREATE TABLE alltypes2 (c1 BOOLEAN, c2 SMALLINT, c3 INT, c4 BIGINT, c5 REAL, c6 DOUBLE, c7 NUMERIC, c8 DATE, c9 CHARACTER VARYING, c10 TIME, c11 TIMESTAMP, c13 INTERVAL, c14 STRUCT<a INT>, c15 INT[], c16 CHARACTER VARYING[]);
CREATE MATERIALIZED VIEW m1 AS SELECT (TRIM(LEADING ('PCMqvE5FrE') FROM t_0.c9)) AS col_0, (ARRAY[(INT '981'), (INT '42'), (INT '738'), (INT '763')]) AS col_1, t_0.c2 AS col_2, (INTERVAL '3600') AS col_3 FROM alltypes1 AS t_0 JOIN region AS t_1 ON t_0.c9 = t_1.r_comment AND t_0.c1 GROUP BY t_0.c2, t_0.c8, t_0.c15, t_0.c9, t_0.c16, t_1.r_name, t_0.c5, t_0.c7, t_0.c6 HAVING true;
CREATE MATERIALIZED VIEW m2 AS SELECT t_0.c1 AS col_0, t_0.c1 AS col_1, t_0.c1 AS col_2, true AS col_3 FROM alltypes2 AS t_0 GROUP BY t_0.c1 HAVING ((832) > ((100) + (SMALLINT '746')));
CREATE MATERIALIZED VIEW m3 AS SELECT t_1.p_size AS col_0 FROM m1 AS t_0 LEFT JOIN part AS t_1 ON t_0.col_0 = t_1.p_type AND true GROUP BY t_1.p_type, t_0.col_3, t_1.p_size, t_1.p_brand, t_1.p_comment;
CREATE MATERIALIZED VIEW m4 AS WITH with_0 AS (SELECT t_1.l_shipmode AS col_0, 'kZrMWYVcBV' AS col_1 FROM lineitem AS t_1 LEFT JOIN customer AS t_2 ON t_1.l_shipmode = t_2.c_address GROUP BY t_1.l_quantity, t_1.l_returnflag, t_2.c_comment, t_1.l_partkey, t_1.l_shipmode HAVING (avg((INTERVAL '-900581')) = TIME '02:05:33')) SELECT (802) AS col_0 FROM with_0;
CREATE MATERIALIZED VIEW m5 AS SELECT (upper((TRIM(t_0.r_comment)))) AS col_0, (coalesce(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, t_0.r_regionkey)) AS col_1, t_0.r_comment AS col_2 FROM region AS t_0 RIGHT JOIN region AS t_1 ON t_0.r_comment = t_1.r_comment AND true WHERE true GROUP BY t_0.r_regionkey, t_0.r_comment;
CREATE MATERIALIZED VIEW m6 AS SELECT '2qpwrIfYuD' AS col_0 FROM (SELECT (TIMESTAMP '2022-01-02 01:05:34') AS col_0, (TRIM(('nUPadiPg61'))) AS col_1, (t_0.seller & t_0.seller) AS col_2, t_0.date_time AS col_3 FROM auction AS t_0 GROUP BY t_0.id, t_0.seller, t_0.date_time, t_0.item_name) AS sq_1 WHERE true GROUP BY sq_1.col_3 HAVING CAST((INT '681') AS BOOLEAN);
CREATE MATERIALIZED VIEW m7 AS WITH with_0 AS (SELECT (hop_1.id % (SMALLINT '512')) AS col_0 FROM hop(person, person.date_time, INTERVAL '86400', INTERVAL '5702400') AS hop_1 WHERE false GROUP BY hop_1.id) SELECT (584) AS col_0, ((REAL '613')) AS col_1 FROM with_0 WHERE true;
CREATE MATERIALIZED VIEW m8 AS SELECT (OVERLAY(sq_2.col_2 PLACING sq_2.col_2 FROM sq_2.col_1)) AS col_0, sq_2.col_2 AS col_1, max((REAL '0')) AS col_2 FROM (SELECT (INT '417') AS col_0, t_1.c3 AS col_1, t_1.c9 AS col_2 FROM customer AS t_0 JOIN alltypes2 AS t_1 ON t_0.c_phone = t_1.c9 WHERE t_1.c1 GROUP BY t_0.c_comment, t_1.c11, t_1.c3, t_0.c_custkey, t_1.c2, t_1.c9, t_0.c_mktsegment, t_1.c8, t_1.c16, t_1.c5, t_0.c_address) AS sq_2 WHERE false GROUP BY sq_2.col_2, sq_2.col_1;
Loading