diff --git a/Cargo.lock b/Cargo.lock index 2d53f017bc9a..41ef7bd9e80c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5994,6 +5994,7 @@ dependencies = [ "futures", "futures-async-stream", "itertools", + "lazy_static", "madsim-tokio", "madsim-tonic", "maplit", diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 420e1747e6ca..d6e38c831856 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -31,6 +31,7 @@ fixedbitset = "0.4.1" futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = "0.2" itertools = "0.10" +lazy_static = "1" maplit = "1" md5 = "0.7.0" num-integer = "0.1" diff --git a/src/frontend/planner_test/src/lib.rs b/src/frontend/planner_test/src/lib.rs index d619297dbdf9..3d80f1473ac9 100644 --- a/src/frontend/planner_test/src/lib.rs +++ b/src/frontend/planner_test/src/lib.rs @@ -502,7 +502,9 @@ impl TestCase { }; if self.optimized_logical_plan.is_some() || self.optimizer_error.is_some() { - let optimized_logical_plan = match logical_plan.gen_optimized_logical_plan() { + // TODO: separate `optimized_logical_plan` into `optimized_logical_plan_for_batch` and + // `optimized_logical_plan_for_stream` + let optimized_logical_plan = match logical_plan.gen_optimized_logical_plan_for_batch() { Ok(optimized_logical_plan) => optimized_logical_plan, Err(err) => { ret.optimizer_error = Some(err.to_string()); diff --git a/src/frontend/planner_test/tests/testdata/join.yaml b/src/frontend/planner_test/tests/testdata/join.yaml index bee2b0fbdde0..c5614ee042f8 100644 --- a/src/frontend/planner_test/tests/testdata/join.yaml +++ b/src/frontend/planner_test/tests/testdata/join.yaml @@ -200,14 +200,14 @@ └─StreamProject { exprs: [Coalesce(i.x, i.x) as $expr1, i.t._row_id, i.t._row_id, i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.x] } └─StreamHashJoin { type: FullOuter, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id, i.x, i.t._row_id, i.t._row_id, i.x] } ├─StreamProject { exprs: [i.x, i.t._row_id, i.t._row_id, i.x] } - | └─StreamShare { id = 513 } + | └─StreamShare { id = 503 } | └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.t._row_id, i.t._row_id, i.x] } | ├─StreamExchange { dist: HashShard(i.x) } | | └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } | └─StreamExchange { dist: HashShard(i.x) } | └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } └─StreamProject { exprs: [i.x, i.t._row_id, i.t._row_id, i.x] } - └─StreamShare { id = 513 } + └─StreamShare { id = 503 } └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.t._row_id, i.t._row_id, i.x] } ├─StreamExchange { dist: HashShard(i.x) } | └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } @@ -503,7 +503,7 @@ └─BatchExchange { order: [], dist: HashShard(b.x) } └─BatchScan { table: b, columns: [b.x], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [y, z, $expr159(hidden), a._row_id(hidden), b._row_id(hidden), a.x(hidden), b.x(hidden)], pk_columns: [a._row_id, b._row_id, a.x, b.x], order_descs: [$expr159, a._row_id, b._row_id, a.x, b.x], pk_conflict: "no check" } + StreamMaterialize { columns: [y, z, $expr153(hidden), a._row_id(hidden), b._row_id(hidden), a.x(hidden), b.x(hidden)], pk_columns: [a._row_id, b._row_id, a.x, b.x], order_descs: [$expr153, a._row_id, b._row_id, a.x, b.x], pk_conflict: "no check" } └─StreamExchange { dist: HashShard(a._row_id, b._row_id, a.x, b.x) } └─StreamProject { exprs: [(2:Int32 * Coalesce(a.x, b.x)) as $expr1, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr2, (Coalesce(a.x, b.x) + Coalesce(a.x, b.x)) as $expr3, a._row_id, b._row_id, a.x, b.x] } └─StreamFilter { predicate: ((2:Int32 * Coalesce(a.x, b.x)) < 10:Int32) } @@ -602,7 +602,7 @@ └─BatchExchange { order: [], dist: HashShard(t2.v2) } └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr29(hidden), t2._row_id(hidden)], pk_columns: [t1._row_id, t2._row_id, $expr29, v2], pk_conflict: "no check" } + StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr28(hidden), t2._row_id(hidden)], pk_columns: [t1._row_id, t2._row_id, $expr28, v2], pk_conflict: "no check" } └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] } ├─StreamExchange { dist: HashShard($expr1) } | └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] } diff --git a/src/frontend/planner_test/tests/testdata/nexmark.yaml b/src/frontend/planner_test/tests/testdata/nexmark.yaml index 595f6e25c59f..88291a39b91b 100644 --- a/src/frontend/planner_test/tests/testdata/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/nexmark.yaml @@ -76,7 +76,7 @@ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, date_time, bid._row_id(hidden)], pk_columns: [bid._row_id], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr53, bid.date_time, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr51, bid.date_time, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -201,7 +201,7 @@ Fragment 0 StreamMaterialize { columns: [category, avg], pk_columns: [category], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price))) as $expr54] } + StreamProject { exprs: [auction.category, (sum(max(bid.price)) / count(max(bid.price))) as $expr52] } StreamHashAgg { group_key: [auction.category], aggs: [count, sum(max(bid.price)), count(max(bid.price))] } result table: 0, state tables: [] StreamExchange Hash([1]) from 1 @@ -292,7 +292,7 @@ └─StreamHashJoin { type: Inner, predicate: window_start = window_start, output: all } ├─StreamExchange { dist: HashShard(window_start) } | └─StreamProject { exprs: [bid.auction, count, window_start] } - | └─StreamShare { id = 979 } + | └─StreamShare { id = 956 } | └─StreamProject { exprs: [bid.auction, window_start, count] } | └─StreamAppendOnlyHashAgg { group_key: [bid.auction, window_start], aggs: [count, count] } | └─StreamExchange { dist: HashShard(bid.auction, window_start) } @@ -303,7 +303,7 @@ └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } └─StreamExchange { dist: HashShard(window_start) } └─StreamProject { exprs: [bid.auction, window_start, count] } - └─StreamShare { id = 979 } + └─StreamShare { id = 956 } └─StreamProject { exprs: [bid.auction, window_start, count] } └─StreamAppendOnlyHashAgg { group_key: [bid.auction, window_start], aggs: [count, count] } └─StreamExchange { dist: HashShard(bid.auction, window_start) } @@ -406,7 +406,7 @@ └─BatchProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr1, bid.price] } └─BatchScan { table: bid, columns: [bid.price, bid.date_time], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr114(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr114, price, max(bid.price)], pk_conflict: "no check" } + StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr110(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr110, price, max(bid.price)], pk_conflict: "no check" } └─StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, $expr1, max(bid.price)] } └─StreamFilter { predicate: (bid.date_time >= $expr2) AND (bid.date_time <= $expr1) } └─StreamHashJoin { type: Inner, predicate: bid.price = max(bid.price), output: all } @@ -420,10 +420,10 @@ └─StreamTableScan { table: bid, columns: [bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr114(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr114, price, max(bid.price)], pk_conflict: "no check" } + StreamMaterialize { columns: [auction, price, bidder, date_time, bid._row_id(hidden), $expr110(hidden), max(bid.price)(hidden)], pk_columns: [bid._row_id, $expr110, price, max(bid.price)], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, $expr114, max(bid.price)] } - StreamFilter { predicate: (bid.date_time >= $expr115) AND (bid.date_time <= $expr114) } + StreamProject { exprs: [bid.auction, bid.price, bid.bidder, bid.date_time, bid._row_id, $expr110, max(bid.price)] } + StreamFilter { predicate: (bid.date_time >= $expr111) AND (bid.date_time <= $expr110) } StreamHashJoin { type: Inner, predicate: bid.price = max(bid.price), output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([2]) from 1 @@ -435,23 +435,23 @@ BatchPlanNode Fragment 2 - StreamProject { exprs: [max(bid.price), $expr114, ($expr114 - '00:00:10':Interval) as $expr115] } - StreamAppendOnlyHashAgg { group_key: [$expr114], aggs: [count, max(bid.price)] } + StreamProject { exprs: [max(bid.price), $expr110, ($expr110 - '00:00:10':Interval) as $expr111] } + StreamAppendOnlyHashAgg { group_key: [$expr110], aggs: [count, max(bid.price)] } result table: 4, state tables: [] StreamExchange Hash([0]) from 3 Fragment 3 - StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr114, bid.price, bid._row_id] } + StreamProject { exprs: [(TumbleStart(bid.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr110, bid.price, bid._row_id] } Chain { table: bid, columns: [bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode Table 0 { columns: [bid_auction, bid_bidder, bid_price, bid_date_time, bid__row_id], primary key: [$2 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } Table 1 { columns: [bid_price, bid__row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [max(bid_price), $expr114, $expr115], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [max(bid_price), $expr114, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 4 { columns: [$expr114, count, max(bid_price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 4294967294 { columns: [auction, price, bidder, date_time, bid._row_id, $expr114, max(bid.price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } + Table 2 { columns: [max(bid_price), $expr110, $expr111], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 3 { columns: [max(bid_price), $expr110, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 4 { columns: [$expr110, count, max(bid_price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 4294967294 { columns: [auction, price, bidder, date_time, bid._row_id, $expr110, max(bid.price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } - id: nexmark_q8 before: - create_tables @@ -500,7 +500,7 @@ └─BatchProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr4] } └─BatchScan { table: auction, columns: [auction.date_time, auction.seller], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [id, name, starttime, $expr218(hidden), auction.seller(hidden), $expr219(hidden), $expr220(hidden)], pk_columns: [id, name, starttime, $expr218, auction.seller, $expr219, $expr220], pk_conflict: "no check" } + StreamMaterialize { columns: [id, name, starttime, $expr210(hidden), auction.seller(hidden), $expr211(hidden), $expr212(hidden)], pk_columns: [id, name, starttime, $expr210, auction.seller, $expr211, $expr212], pk_conflict: "no check" } └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) } | └─StreamProject { exprs: [person.id, person.name, $expr1, $expr2] } @@ -514,38 +514,38 @@ └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [id, name, starttime, $expr218(hidden), auction.seller(hidden), $expr219(hidden), $expr220(hidden)], pk_columns: [id, name, starttime, $expr218, auction.seller, $expr219, $expr220], pk_conflict: "no check" } + StreamMaterialize { columns: [id, name, starttime, $expr210(hidden), auction.seller(hidden), $expr211(hidden), $expr212(hidden)], pk_columns: [id, name, starttime, $expr210, auction.seller, $expr211, $expr212], pk_conflict: "no check" } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr217 = $expr219 AND $expr218 = $expr220, output: all } + StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr209 = $expr211 AND $expr210 = $expr212, output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([0, 2, 3]) from 1 - StreamProject { exprs: [auction.seller, $expr219, $expr220] } - StreamHashAgg { group_key: [auction.seller, $expr219, $expr220], aggs: [count] } + StreamProject { exprs: [auction.seller, $expr211, $expr212] } + StreamHashAgg { group_key: [auction.seller, $expr211, $expr212], aggs: [count] } result table: 5, state tables: [] StreamExchange Hash([0, 1, 2]) from 2 Fragment 1 - StreamProject { exprs: [person.id, person.name, $expr217, $expr218] } - StreamHashAgg { group_key: [person.id, person.name, $expr217, $expr218], aggs: [count] } + StreamProject { exprs: [person.id, person.name, $expr209, $expr210] } + StreamHashAgg { group_key: [person.id, person.name, $expr209, $expr210], aggs: [count] } result table: 4, state tables: [] - StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval) as $expr217, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr218] } + StreamProject { exprs: [person.id, person.name, TumbleStart(person.date_time, '00:00:10':Interval) as $expr209, (TumbleStart(person.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr210] } Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } Upstream BatchPlanNode Fragment 2 - StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr219, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr220, auction.id] } + StreamProject { exprs: [auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr211, (TumbleStart(auction.date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr212, auction.id] } Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } Upstream BatchPlanNode - Table 0 { columns: [person_id, person_name, $expr217, $expr218], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } - Table 1 { columns: [person_id, $expr217, $expr218, person_name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } - Table 2 { columns: [auction_seller, $expr219, $expr220], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } - Table 3 { columns: [auction_seller, $expr219, $expr220, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } - Table 4 { columns: [person_id, person_name, $expr217, $expr218, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 5 { columns: [auction_seller, $expr219, $expr220, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } - Table 4294967294 { columns: [id, name, starttime, $expr218, auction.seller, $expr219, $expr220], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } + Table 0 { columns: [person_id, person_name, $expr209, $expr210], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } + Table 1 { columns: [person_id, $expr209, $expr210, person_name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } + Table 2 { columns: [auction_seller, $expr211, $expr212], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } + Table 3 { columns: [auction_seller, $expr211, $expr212, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4 { columns: [person_id, person_name, $expr209, $expr210, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } + Table 5 { columns: [auction_seller, $expr211, $expr212, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4294967294 { columns: [id, name, starttime, $expr210, auction.seller, $expr211, $expr212], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } - id: nexmark_q9 before: - create_tables @@ -640,7 +640,7 @@ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, date_time, date, time, bid._row_id(hidden)], pk_columns: [bid._row_id], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr105, ToChar(bid.date_time, 'HH:MI':Varchar) as $expr106, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr101, ToChar(bid.date_time, 'HH:MI':Varchar) as $expr102, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -729,7 +729,7 @@ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, bidtimetype, date_time, extra, bid._row_id(hidden)], pk_columns: [bid._row_id], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr105, Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr106, bid.date_time, bid.extra, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, (0.908:Decimal * bid.price) as $expr101, Case(((Extract('HOUR':Varchar, bid.date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, bid.date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, bid.date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, bid.date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr102, bid.date_time, bid.extra, bid._row_id] } StreamFilter { predicate: ((0.908:Decimal * bid.price) > 1000000:Int32) AND ((0.908:Decimal * bid.price) < 50000000:Int32) } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid.extra, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream @@ -777,18 +777,18 @@ Fragment 0 StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [$expr53, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [$expr53], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } + StreamProject { exprs: [$expr51, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [$expr51], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr53, bid.price, bid.bidder, bid.auction, bid._row_id] } + StreamProject { exprs: [ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr51, bid.price, bid.bidder, bid.auction, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [$expr53, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count(distinct bid_bidder), count(distinct bid_bidder) filter((bid_price < 10000:Int32)), count(distinct bid_bidder) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_bidder) filter((bid_price >= 1000000:Int32)), count(distinct bid_auction), count(distinct bid_auction) filter((bid_price < 10000:Int32)), count(distinct bid_auction) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_auction) filter((bid_price >= 1000000:Int32))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } + Table 0 { columns: [$expr51, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count(distinct bid_bidder), count(distinct bid_bidder) filter((bid_price < 10000:Int32)), count(distinct bid_bidder) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_bidder) filter((bid_price >= 1000000:Int32)), count(distinct bid_auction), count(distinct bid_auction) filter((bid_price < 10000:Int32)), count(distinct bid_auction) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_auction) filter((bid_price >= 1000000:Int32))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } Table 4294967294 { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], distribution key: [0] } - id: nexmark_q16 before: @@ -833,18 +833,18 @@ Fragment 0 StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.channel, $expr105, max($expr106), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [bid.channel, $expr105], aggs: [count, max($expr106), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } + StreamProject { exprs: [bid.channel, $expr101, max($expr102), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [bid.channel, $expr101], aggs: [count, max($expr102), count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), count(distinct bid.bidder), count(distinct bid.bidder) filter((bid.price < 10000:Int32)), count(distinct bid.bidder) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.bidder) filter((bid.price >= 1000000:Int32)), count(distinct bid.auction), count(distinct bid.auction) filter((bid.price < 10000:Int32)), count(distinct bid.auction) filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count(distinct bid.auction) filter((bid.price >= 1000000:Int32))] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr105, ToChar(bid.date_time, 'HH:mm':Varchar) as $expr106, bid.price, bid.bidder, bid.auction, bid._row_id] } + StreamProject { exprs: [bid.channel, ToChar(bid.date_time, 'yyyy-MM-dd':Varchar) as $expr101, ToChar(bid.date_time, 'HH:mm':Varchar) as $expr102, bid.price, bid.bidder, bid.auction, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [bid_channel, $expr105, count, max($expr106), count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count(distinct bid_bidder), count(distinct bid_bidder) filter((bid_price < 10000:Int32)), count(distinct bid_bidder) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_bidder) filter((bid_price >= 1000000:Int32)), count(distinct bid_auction), count(distinct bid_auction) filter((bid_price < 10000:Int32)), count(distinct bid_auction) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_auction) filter((bid_price >= 1000000:Int32))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } + Table 0 { columns: [bid_channel, $expr101, count, max($expr102), count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), count(distinct bid_bidder), count(distinct bid_bidder) filter((bid_price < 10000:Int32)), count(distinct bid_bidder) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_bidder) filter((bid_price >= 1000000:Int32)), count(distinct bid_auction), count(distinct bid_auction) filter((bid_price < 10000:Int32)), count(distinct bid_auction) filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count(distinct bid_auction) filter((bid_price >= 1000000:Int32))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } Table 4294967294 { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1] } - id: nexmark_q17 before: @@ -881,18 +881,18 @@ Fragment 0 StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, $expr106, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)) as $expr107, sum(bid.price)] } - StreamAppendOnlyHashAgg { group_key: [bid.auction, $expr106], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price)] } + StreamProject { exprs: [bid.auction, $expr102, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), (sum(bid.price) / count(bid.price)) as $expr103, sum(bid.price)] } + StreamAppendOnlyHashAgg { group_key: [bid.auction, $expr102], aggs: [count, count, count filter((bid.price < 10000:Int32)), count filter((bid.price >= 10000:Int32) AND (bid.price < 1000000:Int32)), count filter((bid.price >= 1000000:Int32)), min(bid.price), max(bid.price), sum(bid.price), count(bid.price)] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr106, bid.price, bid._row_id] } + StreamProject { exprs: [bid.auction, ToChar(bid.date_time, 'YYYY-MM-DD':Varchar) as $expr102, bid.price, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode - Table 0 { columns: [bid_auction, $expr106, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), min(bid_price), max(bid_price), sum(bid_price), count(bid_price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0, 1] } + Table 0 { columns: [bid_auction, $expr102, count, count_0, count filter((bid_price < 10000:Int32)), count filter((bid_price >= 10000:Int32) AND (bid_price < 1000000:Int32)), count filter((bid_price >= 1000000:Int32)), min(bid_price), max(bid_price), sum(bid_price), count(bid_price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0, 1] } Table 4294967294 { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } - id: nexmark_q18 before: @@ -1047,7 +1047,7 @@ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, dir1, dir2, dir3, bid._row_id(hidden)], pk_columns: [bid._row_id], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32) as $expr157, SplitPart(bid.url, '/':Varchar, 5:Int32) as $expr158, SplitPart(bid.url, '/':Varchar, 6:Int32) as $expr159, bid._row_id] } + StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.channel, SplitPart(bid.url, '/':Varchar, 4:Int32) as $expr151, SplitPart(bid.url, '/':Varchar, 5:Int32) as $expr152, SplitPart(bid.url, '/':Varchar, 6:Int32) as $expr153, bid._row_id] } Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } Upstream BatchPlanNode @@ -1173,7 +1173,7 @@ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name], pk_conflict: "no check" } materialized table: 4294967294 - StreamDynamicFilter { predicate: (count(bid.auction) >= $expr54), output: [auction.id, auction.item_name, count(bid.auction)] } + StreamDynamicFilter { predicate: (count(bid.auction) >= $expr52), output: [auction.id, auction.item_name, count(bid.auction)] } left table: 0, right table 1 StreamProject { exprs: [auction.id, auction.item_name, count(bid.auction)] } StreamHashAgg { group_key: [auction.id, auction.item_name], aggs: [count, count(bid.auction)] } @@ -1195,7 +1195,7 @@ BatchPlanNode Fragment 3 - StreamProject { exprs: [(sum0(count) / count(bid.auction)) as $expr54] } + StreamProject { exprs: [(sum0(count) / count(bid.auction)) as $expr52] } StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(bid.auction)] } result table: 7, state tables: [] StreamExchange Single from 4 @@ -1212,7 +1212,7 @@ BatchPlanNode Table 0 { columns: [auction_id, auction_item_name, count(bid_auction)], primary key: [$2 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [$expr54], primary key: [], value indices: [0], distribution key: [] } + Table 1 { columns: [$expr52], primary key: [], value indices: [0], distribution key: [] } Table 2 { columns: [auction_id, auction_item_name, count, count(bid_auction)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } Table 3 { columns: [auction_id, auction_item_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } Table 4 { columns: [auction_id, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } diff --git a/src/frontend/planner_test/tests/testdata/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/nexmark_source.yaml index f2e34ac50fee..786e7d7c50fb 100644 --- a/src/frontend/planner_test/tests/testdata/nexmark_source.yaml +++ b/src/frontend/planner_test/tests/testdata/nexmark_source.yaml @@ -97,7 +97,7 @@ StreamExchange Hash([4]) from 1 Fragment 1 - StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr55, date_time, _row_id] } + StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr53, date_time, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 0 @@ -248,7 +248,7 @@ Fragment 0 StreamMaterialize { columns: [category, avg], pk_columns: [category], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [category, (sum(max(price))::Decimal / count(max(price))) as $expr54] } + StreamProject { exprs: [category, (sum(max(price))::Decimal / count(max(price))) as $expr52] } StreamHashAgg { group_key: [category], aggs: [count, sum(max(price)), count(max(price))] } result table: 0, state tables: [] StreamExchange Hash([1]) from 1 @@ -344,7 +344,7 @@ └─StreamHashJoin { type: Inner, predicate: window_start = window_start, output: all } ├─StreamExchange { dist: HashShard(window_start) } | └─StreamProject { exprs: [auction, count, window_start] } - | └─StreamShare { id = 1091 } + | └─StreamShare { id = 1064 } | └─StreamProject { exprs: [auction, window_start, count] } | └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } | └─StreamExchange { dist: HashShard(auction, window_start) } @@ -357,7 +357,7 @@ └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } └─StreamExchange { dist: HashShard(window_start) } └─StreamProject { exprs: [auction, window_start, count] } - └─StreamShare { id = 1091 } + └─StreamShare { id = 1064 } └─StreamProject { exprs: [auction, window_start, count] } └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } └─StreamExchange { dist: HashShard(auction, window_start) } @@ -465,13 +465,13 @@ └─BatchProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr1, price] } └─BatchSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"], filter: (None, None) } stream_plan: | - StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr119(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr119, price, max(price)], pk_conflict: "no check" } + StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr115(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr115, price, max(price)], pk_conflict: "no check" } └─StreamProject { exprs: [auction, price, bidder, date_time, _row_id, $expr1, max(price)] } └─StreamFilter { predicate: (date_time >= $expr2) AND (date_time <= $expr1) } └─StreamHashJoin { type: Inner, predicate: price = max(price), output: all } ├─StreamExchange { dist: HashShard(price) } | └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } - | └─StreamShare { id = 576 } + | └─StreamShare { id = 564 } | └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } | └─StreamRowIdGen { row_id_index: 7 } | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } @@ -480,16 +480,16 @@ └─StreamAppendOnlyHashAgg { group_key: [$expr1], aggs: [count, max(price)] } └─StreamExchange { dist: HashShard($expr1) } └─StreamProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr1, price, _row_id] } - └─StreamShare { id = 576 } + └─StreamShare { id = 564 } └─StreamProject { exprs: [auction, bidder, price, date_time, _row_id] } └─StreamRowIdGen { row_id_index: 7 } └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr119(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr119, price, max(price)], pk_conflict: "no check" } + StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr115(hidden), max(price)(hidden)], pk_columns: [_row_id, $expr115, price, max(price)], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [auction, price, bidder, date_time, _row_id, $expr119, max(price)] } - StreamFilter { predicate: (date_time >= $expr120) AND (date_time <= $expr119) } + StreamProject { exprs: [auction, price, bidder, date_time, _row_id, $expr115, max(price)] } + StreamFilter { predicate: (date_time >= $expr116) AND (date_time <= $expr115) } StreamHashJoin { type: Inner, predicate: price = max(price), output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([2]) from 1 @@ -506,22 +506,22 @@ source state table: 4 Fragment 3 - StreamProject { exprs: [max(price), $expr119, ($expr119 - '00:00:10':Interval) as $expr120] } - StreamAppendOnlyHashAgg { group_key: [$expr119], aggs: [count, max(price)] } + StreamProject { exprs: [max(price), $expr115, ($expr115 - '00:00:10':Interval) as $expr116] } + StreamAppendOnlyHashAgg { group_key: [$expr115], aggs: [count, max(price)] } result table: 5, state tables: [] StreamExchange Hash([0]) from 4 Fragment 4 - StreamProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr119, price, _row_id] } + StreamProject { exprs: [(TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr115, price, _row_id] } StreamExchange Hash([4]) from 2 Table 0 { columns: [auction, bidder, price, date_time, _row_id], primary key: [$2 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [2] } Table 1 { columns: [price, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } - Table 2 { columns: [max(price), $expr119, $expr120], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 3 { columns: [max(price), $expr119, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } + Table 2 { columns: [max(price), $expr115, $expr116], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } + Table 3 { columns: [max(price), $expr115, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 4 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } - Table 5 { columns: [$expr119, count, max(price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 4294967294 { columns: [auction, price, bidder, date_time, _row_id, $expr119, max(price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } + Table 5 { columns: [$expr115, count, max(price)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 4294967294 { columns: [auction, price, bidder, date_time, _row_id, $expr115, max(price)], primary key: [$4 ASC, $5 ASC, $1 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [1] } - id: nexmark_q8 before: - create_sources @@ -571,7 +571,7 @@ └─BatchProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr3, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr4] } └─BatchSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"], filter: (None, None) } stream_plan: | - StreamMaterialize { columns: [id, name, starttime, $expr226(hidden), seller(hidden), $expr227(hidden), $expr228(hidden)], pk_columns: [id, name, starttime, $expr226, seller, $expr227, $expr228], pk_conflict: "no check" } + StreamMaterialize { columns: [id, name, starttime, $expr218(hidden), seller(hidden), $expr219(hidden), $expr220(hidden)], pk_columns: [id, name, starttime, $expr218, seller, $expr219, $expr220], pk_conflict: "no check" } └─StreamHashJoin { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) } | └─StreamProject { exprs: [id, name, $expr1, $expr2] } @@ -588,43 +588,43 @@ └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [id, name, starttime, $expr226(hidden), seller(hidden), $expr227(hidden), $expr228(hidden)], pk_columns: [id, name, starttime, $expr226, seller, $expr227, $expr228], pk_conflict: "no check" } + StreamMaterialize { columns: [id, name, starttime, $expr218(hidden), seller(hidden), $expr219(hidden), $expr220(hidden)], pk_columns: [id, name, starttime, $expr218, seller, $expr219, $expr220], pk_conflict: "no check" } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: id = seller AND $expr225 = $expr227 AND $expr226 = $expr228, output: all } + StreamHashJoin { type: Inner, predicate: id = seller AND $expr217 = $expr219 AND $expr218 = $expr220, output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([0, 2, 3]) from 1 - StreamProject { exprs: [seller, $expr227, $expr228] } - StreamAppendOnlyHashAgg { group_key: [seller, $expr227, $expr228], aggs: [count] } + StreamProject { exprs: [seller, $expr219, $expr220] } + StreamAppendOnlyHashAgg { group_key: [seller, $expr219, $expr220], aggs: [count] } result table: 6, state tables: [] StreamExchange Hash([0, 1, 2]) from 3 Fragment 1 - StreamProject { exprs: [id, name, $expr225, $expr226] } - StreamAppendOnlyHashAgg { group_key: [id, name, $expr225, $expr226], aggs: [count] } + StreamProject { exprs: [id, name, $expr217, $expr218] } + StreamAppendOnlyHashAgg { group_key: [id, name, $expr217, $expr218], aggs: [count] } result table: 4, state tables: [] StreamExchange Hash([0, 1, 2, 3]) from 2 Fragment 2 - StreamProject { exprs: [id, name, TumbleStart(date_time, '00:00:10':Interval) as $expr225, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr226, _row_id] } + StreamProject { exprs: [id, name, TumbleStart(date_time, '00:00:10':Interval) as $expr217, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr218, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "person", columns: ["id", "name", "email_address", "credit_card", "city", "state", "date_time", "_row_id"] } source state table: 5 Fragment 3 - StreamProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr227, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr228, _row_id] } + StreamProject { exprs: [seller, TumbleStart(date_time, '00:00:10':Interval) as $expr219, (TumbleStart(date_time, '00:00:10':Interval) + '00:00:10':Interval) as $expr220, _row_id] } StreamRowIdGen { row_id_index: 9 } StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } source state table: 7 - Table 0 { columns: [id, name, $expr225, $expr226], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } - Table 1 { columns: [id, $expr225, $expr226, name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } - Table 2 { columns: [seller, $expr227, $expr228], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } - Table 3 { columns: [seller, $expr227, $expr228, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } - Table 4 { columns: [id, name, $expr225, $expr226, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2, 3] } + Table 0 { columns: [id, name, $expr217, $expr218], primary key: [$0 ASC, $2 ASC, $3 ASC, $1 ASC], value indices: [0, 1, 2, 3], distribution key: [0, 2, 3] } + Table 1 { columns: [id, $expr217, $expr218, name, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2] } + Table 2 { columns: [seller, $expr219, $expr220], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0, 1, 2] } + Table 3 { columns: [seller, $expr219, $expr220, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 4 { columns: [id, name, $expr217, $expr218, count], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0, 1, 2, 3] } Table 5 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } - Table 6 { columns: [seller, $expr227, $expr228, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } + Table 6 { columns: [seller, $expr219, $expr220, count], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0, 1, 2] } Table 7 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } - Table 4294967294 { columns: [id, name, starttime, $expr226, seller, $expr227, $expr228], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } + Table 4294967294 { columns: [id, name, starttime, $expr218, seller, $expr219, $expr220], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [0, 2, 3] } - id: nexmark_q9 before: - create_sources @@ -728,7 +728,7 @@ StreamExchange Hash([6]) from 1 Fragment 1 - StreamProject { exprs: [auction, bidder, price, date_time, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr109, ToChar(date_time, 'HH:MI':Varchar) as $expr110, _row_id] } + StreamProject { exprs: [auction, bidder, price, date_time, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr105, ToChar(date_time, 'HH:MI':Varchar) as $expr106, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 0 @@ -823,7 +823,7 @@ StreamExchange Hash([6]) from 1 Fragment 1 - StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr109, Case(((Extract('HOUR':Varchar, date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr110, date_time, extra, _row_id] } + StreamProject { exprs: [auction, bidder, (0.908:Decimal * price) as $expr105, Case(((Extract('HOUR':Varchar, date_time) >= 8:Int32) AND (Extract('HOUR':Varchar, date_time) <= 18:Int32)), 'dayTime':Varchar, ((Extract('HOUR':Varchar, date_time) <= 6:Int32) OR (Extract('HOUR':Varchar, date_time) >= 20:Int32)), 'nightTime':Varchar, 'otherTime':Varchar) as $expr106, date_time, extra, _row_id] } StreamFilter { predicate: ((0.908:Decimal * price) > 1000000:Int32) AND ((0.908:Decimal * price) < 50000000:Int32) } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } @@ -873,18 +873,18 @@ Fragment 0 StreamMaterialize { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [$expr55, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [$expr55], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } + StreamProject { exprs: [$expr53, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [$expr53], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr55, price, bidder, auction, _row_id] } + StreamProject { exprs: [ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr53, price, bidder, auction, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 3 - Table 0 { columns: [$expr55, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } + Table 0 { columns: [$expr53, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))], primary key: [$0 ASC], value indices: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [0] } Table 3 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [day, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], distribution key: [0] } - id: nexmark_q16 @@ -931,18 +931,18 @@ Fragment 0 StreamMaterialize { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], pk_columns: [channel, day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [channel, $expr109, max($expr110), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } - StreamAppendOnlyHashAgg { group_key: [channel, $expr109], aggs: [count, max($expr110), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } + StreamProject { exprs: [channel, $expr105, max($expr106), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } + StreamAppendOnlyHashAgg { group_key: [channel, $expr105], aggs: [count, max($expr106), count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [channel, ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr109, ToChar(date_time, 'HH:mm':Varchar) as $expr110, price, bidder, auction, _row_id] } + StreamProject { exprs: [channel, ToChar(date_time, 'yyyy-MM-dd':Varchar) as $expr105, ToChar(date_time, 'HH:mm':Varchar) as $expr106, price, bidder, auction, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 3 - Table 0 { columns: [channel, $expr109, count, max($expr110), count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } + Table 0 { columns: [channel, $expr105, count, max($expr106), count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), count(distinct bidder), count(distinct bidder) filter((price < 10000:Int32)), count(distinct bidder) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct bidder) filter((price >= 1000000:Int32)), count(distinct auction), count(distinct auction) filter((price < 10000:Int32)), count(distinct auction) filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count(distinct auction) filter((price >= 1000000:Int32))], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], distribution key: [0, 1] } Table 3 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [channel, day, minute, total_bids, rank1_bids, rank2_bids, rank3_bids, total_bidders, rank1_bidders, rank2_bidders, rank3_bidders, total_auctions, rank1_auctions, rank2_auctions, rank3_auctions], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0, 1] } - id: nexmark_q17 @@ -981,18 +981,18 @@ Fragment 0 StreamMaterialize { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], pk_columns: [auction, day], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [auction, $expr108, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), (sum(price)::Decimal / count(price)) as $expr109, sum(price)] } - StreamAppendOnlyHashAgg { group_key: [auction, $expr108], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price)] } + StreamProject { exprs: [auction, $expr104, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), (sum(price)::Decimal / count(price)) as $expr105, sum(price)] } + StreamAppendOnlyHashAgg { group_key: [auction, $expr104], aggs: [count, count, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price)] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [auction, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr108, price, _row_id] } + StreamProject { exprs: [auction, ToChar(date_time, 'YYYY-MM-DD':Varchar) as $expr104, price, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 1 - Table 0 { columns: [auction, $expr108, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0, 1] } + Table 0 { columns: [auction, $expr104, count, count_0, count filter((price < 10000:Int32)), count filter((price >= 10000:Int32) AND (price < 1000000:Int32)), count filter((price >= 1000000:Int32)), min(price), max(price), sum(price), count(price)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [0, 1] } Table 1 { columns: [partition_id, offset], primary key: [$0 ASC], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [auction, day, total_bids, rank1_bids, rank2_bids, rank3_bids, min_price, max_price, avg_price, sum_price], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } - id: nexmark_q18 @@ -1168,7 +1168,7 @@ StreamExchange Hash([7]) from 1 Fragment 1 - StreamProject { exprs: [auction, bidder, price, channel, SplitPart(url, '/':Varchar, 4:Int32) as $expr163, SplitPart(url, '/':Varchar, 5:Int32) as $expr164, SplitPart(url, '/':Varchar, 6:Int32) as $expr165, _row_id] } + StreamProject { exprs: [auction, bidder, price, channel, SplitPart(url, '/':Varchar, 4:Int32) as $expr157, SplitPart(url, '/':Varchar, 5:Int32) as $expr158, SplitPart(url, '/':Varchar, 6:Int32) as $expr159, _row_id] } StreamRowIdGen { row_id_index: 7 } StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } source state table: 0 @@ -1294,7 +1294,7 @@ | | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "_row_id"] } | └─StreamExchange { dist: HashShard(auction) } | └─StreamProject { exprs: [auction, _row_id] } - | └─StreamShare { id = 784 } + | └─StreamShare { id = 767 } | └─StreamProject { exprs: [auction, _row_id] } | └─StreamRowIdGen { row_id_index: 7 } | └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } @@ -1306,7 +1306,7 @@ └─StreamAppendOnlyHashAgg { group_key: [auction], aggs: [count, count] } └─StreamExchange { dist: HashShard(auction) } └─StreamProject { exprs: [auction, _row_id] } - └─StreamShare { id = 784 } + └─StreamShare { id = 767 } └─StreamProject { exprs: [auction, _row_id] } └─StreamRowIdGen { row_id_index: 7 } └─StreamSource { source: "bid", columns: ["auction", "bidder", "price", "channel", "url", "date_time", "extra", "_row_id"] } @@ -1314,7 +1314,7 @@ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], pk_columns: [auction_id, auction_item_name], pk_conflict: "no check" } materialized table: 4294967294 - StreamDynamicFilter { predicate: (count(auction) >= $expr55), output: [id, item_name, count(auction)] } + StreamDynamicFilter { predicate: (count(auction) >= $expr53), output: [id, item_name, count(auction)] } left table: 0, right table 1 StreamProject { exprs: [id, item_name, count(auction)] } StreamAppendOnlyHashAgg { group_key: [id, item_name], aggs: [count, count(auction)] } @@ -1342,7 +1342,7 @@ source state table: 8 Fragment 4 - StreamProject { exprs: [(sum0(count) / count(auction)) as $expr55] } + StreamProject { exprs: [(sum0(count) / count(auction)) as $expr53] } StreamGlobalSimpleAgg { aggs: [count, sum0(count), count(auction)] } result table: 9, state tables: [] StreamExchange Single from 5 @@ -1358,7 +1358,7 @@ StreamExchange Hash([1]) from 3 Table 0 { columns: [id, item_name, count(auction)], primary key: [$2 ASC, $0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } - Table 1 { columns: [$expr55], primary key: [], value indices: [0], distribution key: [] } + Table 1 { columns: [$expr53], primary key: [], value indices: [0], distribution key: [] } Table 2 { columns: [id, item_name, count, count(auction)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0] } Table 3 { columns: [id, item_name, _row_id], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } Table 4 { columns: [id, _row_id, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } diff --git a/src/frontend/planner_test/tests/testdata/order_by.yaml b/src/frontend/planner_test/tests/testdata/order_by.yaml index b09c1947a54d..e8f42e306cac 100644 --- a/src/frontend/planner_test/tests/testdata/order_by.yaml +++ b/src/frontend/planner_test/tests/testdata/order_by.yaml @@ -133,7 +133,7 @@ └─BatchProject { exprs: [t.x, t.y, (t.x + t.y) as $expr1, t.z] } └─BatchScan { table: t, columns: [t.x, t.y, t.z], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [x, y, $expr72(hidden), t.z(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [$expr72, t.z, t._row_id], pk_conflict: "no check" } + StreamMaterialize { columns: [x, y, $expr71(hidden), t.z(hidden), t._row_id(hidden)], pk_columns: [t._row_id], order_descs: [$expr71, t.z, t._row_id], pk_conflict: "no check" } └─StreamProject { exprs: [t.x, t.y, (t.x + t.y) as $expr1, t.z, t._row_id] } └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: order by the number of an output column diff --git a/src/frontend/planner_test/tests/testdata/pk_derive.yaml b/src/frontend/planner_test/tests/testdata/pk_derive.yaml index ddadb2150e78..f824828f613f 100644 --- a/src/frontend/planner_test/tests/testdata/pk_derive.yaml +++ b/src/frontend/planner_test/tests/testdata/pk_derive.yaml @@ -56,13 +56,13 @@ | └─StreamHashAgg { group_key: [t.id], aggs: [count, max(t.v)] } | └─StreamExchange { dist: HashShard(t.id) } | └─StreamProject { exprs: [t.id, t.v, t._row_id] } - | └─StreamShare { id = 333 } + | └─StreamShare { id = 325 } | └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } └─StreamProject { exprs: [min(t.v), t.id] } └─StreamHashAgg { group_key: [t.id], aggs: [count, min(t.v)] } └─StreamExchange { dist: HashShard(t.id) } └─StreamProject { exprs: [t.id, t.v, t._row_id] } - └─StreamShare { id = 333 } + └─StreamShare { id = 325 } └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 varchar, v2 varchar, v3 varchar); diff --git a/src/frontend/planner_test/tests/testdata/project_set.yaml b/src/frontend/planner_test/tests/testdata/project_set.yaml index f54a162e6276..2056cc84bbf9 100644 --- a/src/frontend/planner_test/tests/testdata/project_set.yaml +++ b/src/frontend/planner_test/tests/testdata/project_set.yaml @@ -157,13 +157,13 @@ └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id, Unnest($0)] } ├─StreamExchange { dist: HashShard(Unnest($0)) } | └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } - | └─StreamShare { id = 488 } + | └─StreamShare { id = 477 } | └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } | └─StreamProjectSet { select_list: [Unnest($0), $1] } | └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } └─StreamExchange { dist: HashShard(Unnest($0)) } └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } - └─StreamShare { id = 488 } + └─StreamShare { id = 477 } └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } └─StreamProjectSet { select_list: [Unnest($0), $1] } └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/share.yaml b/src/frontend/planner_test/tests/testdata/share.yaml index 3e8e3b40d370..bccf8da8450a 100644 --- a/src/frontend/planner_test/tests/testdata/share.yaml +++ b/src/frontend/planner_test/tests/testdata/share.yaml @@ -44,7 +44,7 @@ | └─StreamProject { exprs: [id, _row_id] } | └─StreamFilter { predicate: (initial_bid = 1:Int32) } | └─StreamProject { exprs: [id, initial_bid, _row_id] } - | └─StreamShare { id = 661 } + | └─StreamShare { id = 650 } | └─StreamProject { exprs: [id, initial_bid, _row_id] } | └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } | └─StreamRowIdGen { row_id_index: 10 } @@ -53,7 +53,7 @@ └─StreamProject { exprs: [id, _row_id] } └─StreamFilter { predicate: (initial_bid = 2:Int32) } └─StreamProject { exprs: [id, initial_bid, _row_id] } - └─StreamShare { id = 661 } + └─StreamShare { id = 650 } └─StreamProject { exprs: [id, initial_bid, _row_id] } └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } └─StreamRowIdGen { row_id_index: 10 } @@ -117,7 +117,7 @@ └─StreamHashJoin { type: Inner, predicate: window_start = window_start, output: all } ├─StreamExchange { dist: HashShard(window_start) } | └─StreamProject { exprs: [auction, count, window_start] } - | └─StreamShare { id = 1091 } + | └─StreamShare { id = 1064 } | └─StreamProject { exprs: [auction, window_start, count] } | └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } | └─StreamExchange { dist: HashShard(auction, window_start) } @@ -130,7 +130,7 @@ └─StreamHashAgg { group_key: [window_start], aggs: [count, max(count)] } └─StreamExchange { dist: HashShard(window_start) } └─StreamProject { exprs: [auction, window_start, count] } - └─StreamShare { id = 1091 } + └─StreamShare { id = 1064 } └─StreamProject { exprs: [auction, window_start, count] } └─StreamAppendOnlyHashAgg { group_key: [auction, window_start], aggs: [count, count] } └─StreamExchange { dist: HashShard(auction, window_start) } @@ -148,7 +148,7 @@ └─StreamUnion { all: true } ├─StreamExchange { dist: HashShard(0:Int32) } | └─StreamProject { exprs: [sum0(count), 0:Int32] } - | └─StreamShare { id = 334 } + | └─StreamShare { id = 325 } | └─StreamProject { exprs: [sum0(count)] } | └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } | └─StreamExchange { dist: Single } @@ -156,7 +156,7 @@ | └─StreamTableScan { table: t, columns: [t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } └─StreamExchange { dist: HashShard(1:Int32) } └─StreamProject { exprs: [sum0(count), 1:Int32] } - └─StreamShare { id = 334 } + └─StreamShare { id = 325 } └─StreamProject { exprs: [sum0(count)] } └─StreamGlobalSimpleAgg { aggs: [count, sum0(count)] } └─StreamExchange { dist: Single } @@ -196,13 +196,13 @@ └─StreamAppendOnlyHashJoin { type: Inner, predicate: id = id, output: [_row_id, id, _row_id, id] } ├─StreamExchange { dist: HashShard(id) } | └─StreamProject { exprs: [id, _row_id] } - | └─StreamShare { id = 251 } + | └─StreamShare { id = 247 } | └─StreamProject { exprs: [id, _row_id] } | └─StreamRowIdGen { row_id_index: 10 } | └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } └─StreamExchange { dist: HashShard(id) } └─StreamProject { exprs: [id, _row_id] } - └─StreamShare { id = 251 } + └─StreamShare { id = 247 } └─StreamProject { exprs: [id, _row_id] } └─StreamRowIdGen { row_id_index: 10 } └─StreamSource { source: "auction", columns: ["id", "item_name", "description", "initial_bid", "reserve", "date_time", "expires", "seller", "category", "extra", "_row_id"] } diff --git a/src/frontend/planner_test/tests/testdata/shared_views.yaml b/src/frontend/planner_test/tests/testdata/shared_views.yaml index 4991aadf23fd..3cb901d658f3 100644 --- a/src/frontend/planner_test/tests/testdata/shared_views.yaml +++ b/src/frontend/planner_test/tests/testdata/shared_views.yaml @@ -22,11 +22,11 @@ └─LogicalFilter { predicate: (t1.y > 0:Int32) } └─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } stream_plan: | - StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden), $expr93(hidden)], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, $expr93, z, a], pk_conflict: "no check" } + StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden), $expr89(hidden)], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, $expr89, z, a], pk_conflict: "no check" } └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x, $expr1] } ├─StreamExchange { dist: HashShard($expr1) } | └─StreamProject { exprs: [$expr1, t1._row_id] } - | └─StreamShare { id = 365 } + | └─StreamShare { id = 355 } | └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } | └─StreamFilter { predicate: (t1.y > 0:Int32) } | └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } @@ -37,7 +37,7 @@ | └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } └─StreamExchange { dist: HashShard($expr1) } └─StreamProject { exprs: [$expr1, t1._row_id] } - └─StreamShare { id = 365 } + └─StreamShare { id = 355 } └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } └─StreamFilter { predicate: (t1.y > 0:Int32) } └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/subquery.yaml b/src/frontend/planner_test/tests/testdata/subquery.yaml index a5409528c62a..51f73560fb2e 100644 --- a/src/frontend/planner_test/tests/testdata/subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/subquery.yaml @@ -218,16 +218,14 @@ └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } optimized_logical_plan: | LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(auction.date_time, auction.date_time), output: all } - ├─LogicalShare { id = 248 } - | └─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } - | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } + ├─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } + | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } └─LogicalProject { exprs: [auction.date_time] } └─LogicalAgg { group_key: [auction.date_time, auction.date_time], aggs: [] } └─LogicalJoin { type: Inner, on: true, output: [auction.date_time, auction.date_time] } ├─LogicalAgg { group_key: [auction.date_time], aggs: [] } - | └─LogicalShare { id = 248 } - | └─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } - | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } + | └─LogicalHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time] } + | └─LogicalScan { table: auction, columns: [auction.date_time], predicate: IsNotNull(auction.date_time) } └─LogicalValues { rows: [[]], schema: Schema { fields: [] } } batch_plan: | BatchExchange { order: [], dist: Single } @@ -243,7 +241,7 @@ └─BatchNestedLoopJoin { type: Inner, predicate: true, output: [auction.date_time, auction.date_time] } ├─BatchExchange { order: [], dist: Single } | └─BatchHashAgg { group_key: [auction.date_time], aggs: [] } - | └─BatchHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: all } + | └─BatchHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time] } | └─BatchExchange { order: [], dist: HashShard(auction.date_time) } | └─BatchFilter { predicate: IsNotNull(auction.date_time) } | └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } diff --git a/src/frontend/planner_test/tests/testdata/tpch.yaml b/src/frontend/planner_test/tests/testdata/tpch.yaml index c68de5bf7a65..e13fe7575292 100644 --- a/src/frontend/planner_test/tests/testdata/tpch.yaml +++ b/src/frontend/planner_test/tests/testdata/tpch.yaml @@ -150,19 +150,19 @@ Fragment 0 StreamMaterialize { columns: [l_returnflag, l_linestatus, sum_qty, sum_base_price, sum_disc_price, sum_charge, avg_qty, avg_price, avg_disc, count_order], pk_columns: [l_returnflag, l_linestatus], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr363), sum($expr364), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr365, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr366, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr367, count] } - StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr363), sum($expr364), count(lineitem.l_quantity), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } + StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr358), sum($expr359), RoundDigit((sum(lineitem.l_quantity) / count(lineitem.l_quantity)), 4:Int32) as $expr360, RoundDigit((sum(lineitem.l_extendedprice) / count(lineitem.l_extendedprice)), 4:Int32) as $expr361, RoundDigit((sum(lineitem.l_discount) / count(lineitem.l_discount)), 4:Int32) as $expr362, count] } + StreamHashAgg { group_key: [lineitem.l_returnflag, lineitem.l_linestatus], aggs: [count, sum(lineitem.l_quantity), sum(lineitem.l_extendedprice), sum($expr358), sum($expr359), count(lineitem.l_quantity), count(lineitem.l_extendedprice), sum(lineitem.l_discount), count(lineitem.l_discount), count] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr363, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr364, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_quantity, lineitem.l_extendedprice, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr358, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) * (1:Int32 + lineitem.l_tax)) as $expr359, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate <= '1998-09-21 00:00:00':Timestamp) } Chain { table: lineitem, columns: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_returnflag, lineitem_l_linestatus, count, sum(lineitem_l_quantity), sum(lineitem_l_extendedprice), sum($expr363), sum($expr364), count(lineitem_l_quantity), count(lineitem_l_extendedprice), sum(lineitem_l_discount), count(lineitem_l_discount), count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [0, 1] } + Table 0 { columns: [lineitem_l_returnflag, lineitem_l_linestatus, count, sum(lineitem_l_quantity), sum(lineitem_l_extendedprice), sum($expr358), sum($expr359), count(lineitem_l_quantity), count(lineitem_l_extendedprice), sum(lineitem_l_discount), count(lineitem_l_discount), count_0], primary key: [$0 ASC, $1 ASC], value indices: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11], distribution key: [0, 1] } Table 4294967294 { columns: [l_returnflag, l_linestatus, sum_qty, sum_base_price, sum_disc_price, sum_charge, avg_qty, avg_price, avg_disc, count_order], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], distribution key: [0, 1] } - id: tpch_q2 before: @@ -620,22 +620,22 @@ Fragment 0 StreamMaterialize { columns: [l_orderkey, revenue, o_orderdate, o_shippriority], pk_columns: [l_orderkey, o_orderdate, o_shippriority], order_descs: [revenue, o_orderdate, l_orderkey, o_shippriority], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_orderkey, sum($expr74), orders.o_orderdate, orders.o_shippriority] } - StreamTopN { order: "[sum($expr74) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr73), orders.o_orderdate, orders.o_shippriority] } + StreamTopN { order: "[sum($expr73) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0 } state table: 0 StreamExchange Single from 1 Fragment 1 - StreamGroupTopN { order: "[sum($expr74) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } + StreamGroupTopN { order: "[sum($expr73) DESC, orders.o_orderdate ASC]", limit: 10, offset: 0, group_key: [4] } state table: 1 - StreamProject { exprs: [lineitem.l_orderkey, sum($expr74), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) as $expr75] } - StreamProject { exprs: [lineitem.l_orderkey, sum($expr74), orders.o_orderdate, orders.o_shippriority] } - StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum($expr74)] } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr73), orders.o_orderdate, orders.o_shippriority, Vnode(lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority) as $expr74] } + StreamProject { exprs: [lineitem.l_orderkey, sum($expr73), orders.o_orderdate, orders.o_shippriority] } + StreamHashAgg { group_key: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority], aggs: [count, sum($expr73)] } result table: 2, state tables: [] StreamExchange Hash([0, 1, 2]) from 2 Fragment 2 - StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr74, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr73, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, customer.c_custkey, orders.o_orderkey, orders.o_custkey, lineitem.l_linenumber] } left table: 3, right table 5, left degree table: 4, right degree table: 6, StreamExchange Hash([0]) from 3 @@ -667,9 +667,9 @@ Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_orderkey, sum($expr74), orders_o_orderdate, orders_o_shippriority, $expr75], primary key: [$1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [] } - Table 1 { columns: [lineitem_l_orderkey, sum($expr74), orders_o_orderdate, orders_o_shippriority, $expr75], primary key: [$4 ASC, $1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 2, 3], vnode column idx: 4 } - Table 2 { columns: [lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority, count, sum($expr74)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } + Table 0 { columns: [lineitem_l_orderkey, sum($expr73), orders_o_orderdate, orders_o_shippriority, $expr74], primary key: [$1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [] } + Table 1 { columns: [lineitem_l_orderkey, sum($expr73), orders_o_orderdate, orders_o_shippriority, $expr74], primary key: [$4 ASC, $1 DESC, $2 ASC, $0 ASC, $3 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0, 2, 3], vnode column idx: 4 } + Table 2 { columns: [lineitem_l_orderkey, orders_o_orderdate, orders_o_shippriority, count, sum($expr73)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } Table 3 { columns: [orders_o_orderkey, orders_o_orderdate, orders_o_shippriority, customer_c_custkey, orders_o_custkey], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } @@ -900,13 +900,13 @@ Fragment 0 StreamMaterialize { columns: [n_name, revenue], pk_columns: [n_name], order_descs: [revenue, n_name], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, sum($expr72)] } - StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum($expr72)] } + StreamProject { exprs: [nation.n_name, sum($expr71)] } + StreamHashAgg { group_key: [nation.n_name], aggs: [count, sum($expr71)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr72, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } + StreamProject { exprs: [nation.n_name, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr71, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, region.r_regionkey, nation.n_regionkey] } StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, customer.c_custkey, orders.o_orderkey, orders.o_custkey, supplier.s_suppkey, customer.c_nationkey, supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, nation.n_regionkey, region.r_regionkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([3]) from 2 @@ -970,7 +970,7 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, count, sum($expr72)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 0 { columns: [nation_n_name, count, sum($expr71)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, nation_n_regionkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey], primary key: [$3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [3] } Table 2 { columns: [nation_n_regionkey, customer_c_custkey, orders_o_orderkey, orders_o_custkey, supplier_s_suppkey, customer_c_nationkey, supplier_s_nationkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC], value indices: [11], distribution key: [0] } Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } @@ -1035,20 +1035,20 @@ Fragment 0 StreamMaterialize { columns: [revenue], pk_columns: [], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [sum(sum($expr74))] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr74))] } + StreamProject { exprs: [sum(sum($expr73))] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr73))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr74)] } - StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr74, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr73)] } + StreamProject { exprs: [(lineitem.l_extendedprice * lineitem.l_discount) as $expr73, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate >= '1994-01-01':Date) AND (lineitem.l_shipdate < '1995-01-01 00:00:00':Timestamp) AND (lineitem.l_discount >= 0.07:Decimal) AND (lineitem.l_discount <= 0.09:Decimal) AND (lineitem.l_quantity < 24:Int32) } Chain { table: lineitem, columns: [lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_quantity, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum($expr74))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr73))], primary key: [], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [revenue], primary key: [], value indices: [0], distribution key: [] } - id: tpch_q7 before: @@ -1177,13 +1177,13 @@ Fragment 0 StreamMaterialize { columns: [supp_nation, cust_nation, l_year, revenue], pk_columns: [supp_nation, cust_nation, l_year], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, nation.n_name, $expr151, sum($expr152)] } - StreamHashAgg { group_key: [nation.n_name, nation.n_name, $expr151], aggs: [count, sum($expr152)] } + StreamProject { exprs: [nation.n_name, nation.n_name, $expr149, sum($expr150)] } + StreamHashAgg { group_key: [nation.n_name, nation.n_name, $expr149], aggs: [count, sum($expr150)] } result table: 0, state tables: [] StreamExchange Hash([0, 1, 2]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr151, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr152, supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } + StreamProject { exprs: [nation.n_name, nation.n_name, Extract('YEAR':Varchar, lineitem.l_shipdate) as $expr149, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr150, supplier.s_suppkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey] } StreamFilter { predicate: (((nation.n_name = 'ROMANIA':Varchar) AND (nation.n_name = 'IRAN':Varchar)) OR ((nation.n_name = 'IRAN':Varchar) AND (nation.n_name = 'ROMANIA':Varchar))) } StreamHashJoin { type: Inner, predicate: customer.c_nationkey = nation.n_nationkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, @@ -1245,7 +1245,7 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, nation_n_name_0, $expr151, count, sum($expr152)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } + Table 0 { columns: [nation_n_name, nation_n_name_0, $expr149, count, sum($expr150)], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3, 4], distribution key: [0, 1, 2] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_shipdate, nation_n_name, customer_c_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, orders_o_custkey, customer_c_custkey], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $13 ASC, $12 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], distribution key: [4] } Table 2 { columns: [customer_c_nationkey, supplier_s_suppkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } Table 3 { columns: [nation_n_nationkey, nation_n_name], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } @@ -1418,13 +1418,13 @@ Fragment 0 StreamMaterialize { columns: [o_year, mkt_share], pk_columns: [o_year], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [$expr295, RoundDigit((sum($expr296) / sum($expr297)), 6:Int32) as $expr298] } - StreamHashAgg { group_key: [$expr295], aggs: [count, sum($expr296), sum($expr297)] } + StreamProject { exprs: [$expr291, RoundDigit((sum($expr292) / sum($expr293)), 6:Int32) as $expr294] } + StreamHashAgg { group_key: [$expr291], aggs: [count, sum($expr292), sum($expr293)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr295, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Decimal) as $expr296, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr297, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } + StreamProject { exprs: [Extract('YEAR':Varchar, orders.o_orderdate) as $expr291, Case((nation.n_name = 'IRAN':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Decimal) as $expr292, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr293, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, region.r_regionkey, nation.n_regionkey] } StreamHashJoin { type: Inner, predicate: nation.n_regionkey = region.r_regionkey, output: [lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, orders.o_orderkey, customer.c_custkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, nation.n_regionkey, region.r_regionkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([4]) from 2 @@ -1511,7 +1511,7 @@ Upstream BatchPlanNode - Table 0 { columns: [$expr295, count, sum($expr296), sum($expr297)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 0 { columns: [$expr291, count, sum($expr292), sum($expr293)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } Table 1 { columns: [lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, orders_o_orderdate, nation_n_regionkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, customer_c_nationkey, nation_n_nationkey_0], primary key: [$4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $17 ASC, $16 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], distribution key: [4] } Table 2 { columns: [nation_n_regionkey, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, orders_o_orderkey, customer_c_custkey, orders_o_custkey, nation_n_nationkey_0, customer_c_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC], value indices: [14], distribution key: [0] } Table 3 { columns: [region_r_regionkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } @@ -1664,13 +1664,13 @@ Fragment 0 StreamMaterialize { columns: [nation, o_year, sum_profit], pk_columns: [nation, o_year], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [nation.n_name, $expr221, RoundDigit(sum($expr222), 2:Int32) as $expr223] } - StreamHashAgg { group_key: [nation.n_name, $expr221], aggs: [count, sum($expr222)] } + StreamProject { exprs: [nation.n_name, $expr218, RoundDigit(sum($expr219), 2:Int32) as $expr220] } + StreamHashAgg { group_key: [nation.n_name, $expr218], aggs: [count, sum($expr219)] } result table: 0, state tables: [] StreamExchange Hash([0, 1]) from 1 Fragment 1 - StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr221, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr222, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } + StreamProject { exprs: [nation.n_name, Extract('YEAR':Varchar, orders.o_orderdate) as $expr218, ((lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) - (partsupp.ps_supplycost * lineitem.l_quantity)) as $expr219, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } StreamHashJoin { type: Inner, predicate: lineitem.l_orderkey = orders.o_orderkey, output: [lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, partsupp.ps_supplycost, orders.o_orderdate, nation.n_name, part.p_partkey, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, supplier.s_suppkey, lineitem.l_suppkey, nation.n_nationkey, supplier.s_nationkey, partsupp.ps_partkey, partsupp.ps_suppkey, orders.o_orderkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -1732,7 +1732,7 @@ Upstream BatchPlanNode - Table 0 { columns: [nation_n_name, $expr221, count, sum($expr222)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } + Table 0 { columns: [nation_n_name, $expr218, count, sum($expr219)], primary key: [$0 ASC, $1 ASC], value indices: [2, 3], distribution key: [0, 1] } Table 1 { columns: [lineitem_l_orderkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, nation_n_name, partsupp_ps_supplycost, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey], primary key: [$0 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], distribution key: [0] } Table 2 { columns: [lineitem_l_orderkey, part_p_partkey, lineitem_l_linenumber, lineitem_l_partkey, supplier_s_suppkey, lineitem_l_suppkey, nation_n_nationkey, supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC, $7 ASC, $8 ASC, $9 ASC], value indices: [10], distribution key: [0] } Table 3 { columns: [orders_o_orderkey, orders_o_orderdate], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } @@ -1870,22 +1870,22 @@ Fragment 0 StreamMaterialize { columns: [c_custkey, c_name, revenue, c_acctbal, n_name, c_address, c_phone, c_comment], pk_columns: [c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment], order_descs: [revenue, c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr74), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - StreamTopN { order: "[sum($expr74) DESC]", limit: 20, offset: 0 } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr73), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + StreamTopN { order: "[sum($expr73) DESC]", limit: 20, offset: 0 } state table: 0 StreamExchange Single from 1 Fragment 1 - StreamGroupTopN { order: "[sum($expr74) DESC]", limit: 20, offset: 0, group_key: [8] } + StreamGroupTopN { order: "[sum($expr73) DESC]", limit: 20, offset: 0, group_key: [8] } state table: 1 - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr74), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) as $expr75] } - StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr74), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } - StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum($expr74)] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr73), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment, Vnode(customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment) as $expr74] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, sum($expr73), customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment] } + StreamHashAgg { group_key: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment], aggs: [count, sum($expr73)] } result table: 2, state tables: [] StreamExchange Hash([0, 1, 2, 3, 4, 5, 6]) from 2 Fragment 2 - StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr74, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment, (lineitem.l_extendedprice * (1.00:Decimal - lineitem.l_discount)) as $expr73, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, orders.o_orderkey, orders.o_custkey, nation.n_nationkey, customer.c_nationkey, lineitem.l_orderkey, lineitem.l_linenumber] } left table: 3, right table 5, left degree table: 4, right degree table: 6, StreamExchange Hash([6]) from 3 @@ -1927,9 +1927,9 @@ Upstream BatchPlanNode - Table 0 { columns: [customer_c_custkey, customer_c_name, sum($expr74), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr75], primary key: [$2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [] } - Table 1 { columns: [customer_c_custkey, customer_c_name, sum($expr74), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr75], primary key: [$8 ASC, $2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0, 1, 3, 6, 4, 5, 7], vnode column idx: 8 } - Table 2 { columns: [customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment, count, sum($expr74)], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7, 8], distribution key: [0, 1, 2, 3, 4, 5, 6] } + Table 0 { columns: [customer_c_custkey, customer_c_name, sum($expr73), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr74], primary key: [$2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [] } + Table 1 { columns: [customer_c_custkey, customer_c_name, sum($expr73), customer_c_acctbal, nation_n_name, customer_c_address, customer_c_phone, customer_c_comment, $expr74], primary key: [$8 ASC, $2 DESC, $0 ASC, $1 ASC, $3 ASC, $6 ASC, $4 ASC, $5 ASC, $7 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8], distribution key: [0, 1, 3, 6, 4, 5, 7], vnode column idx: 8 } + Table 2 { columns: [customer_c_custkey, customer_c_name, customer_c_acctbal, customer_c_phone, nation_n_name, customer_c_address, customer_c_comment, count, sum($expr73)], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC, $6 ASC], value indices: [7, 8], distribution key: [0, 1, 2, 3, 4, 5, 6] } Table 3 { columns: [customer_c_custkey, customer_c_name, customer_c_address, customer_c_phone, customer_c_acctbal, customer_c_comment, orders_o_orderkey, nation_n_name, orders_o_custkey, customer_c_nationkey, nation_n_nationkey], primary key: [$6 ASC, $0 ASC, $8 ASC, $10 ASC, $9 ASC], value indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], distribution key: [6] } Table 4 { columns: [orders_o_orderkey, customer_c_custkey, orders_o_custkey, nation_n_nationkey, customer_c_nationkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC, $4 ASC], value indices: [5], distribution key: [0] } Table 5 { columns: [lineitem_l_orderkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } @@ -2042,7 +2042,7 @@ | └─StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum($expr1)] } | └─StreamExchange { dist: HashShard(partsupp.ps_partkey) } | └─StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr1, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } - | └─StreamShare { id = 1485 } + | └─StreamShare { id = 1471 } | └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } | ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } | | └─StreamHashJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey, partsupp.ps_suppkey, supplier.s_suppkey] } @@ -2060,7 +2060,7 @@ └─StreamExchange { dist: Single } └─StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr2)] } └─StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr2, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } - └─StreamShare { id = 1485 } + └─StreamShare { id = 1471 } └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, partsupp.ps_suppkey, supplier.s_suppkey, supplier.s_nationkey, nation.n_nationkey] } ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } | └─StreamHashJoin { type: Inner, predicate: partsupp.ps_suppkey = supplier.s_suppkey, output: [partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey, partsupp.ps_suppkey, supplier.s_suppkey] } @@ -2076,16 +2076,16 @@ Fragment 0 StreamMaterialize { columns: [ps_partkey, value], pk_columns: [ps_partkey], order_descs: [value, ps_partkey], pk_conflict: "no check" } materialized table: 4294967294 - StreamDynamicFilter { predicate: (sum($expr221) > $expr223), output: [partsupp.ps_partkey, sum($expr221)] } + StreamDynamicFilter { predicate: (sum($expr218) > $expr220), output: [partsupp.ps_partkey, sum($expr218)] } left table: 0, right table 1 - StreamProject { exprs: [partsupp.ps_partkey, sum($expr221)] } - StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum($expr221)] } + StreamProject { exprs: [partsupp.ps_partkey, sum($expr218)] } + StreamHashAgg { group_key: [partsupp.ps_partkey], aggs: [count, sum($expr218)] } result table: 2, state tables: [] StreamExchange Hash([0]) from 1 StreamExchange Broadcast from 7 Fragment 1 - StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr221, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + StreamProject { exprs: [partsupp.ps_partkey, (partsupp.ps_supplycost * partsupp.ps_availqty) as $expr218, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } StreamExchange Hash([5]) from 2 Fragment 2 @@ -2118,19 +2118,19 @@ BatchPlanNode Fragment 7 - StreamProject { exprs: [(sum(sum($expr222)) * 0.0001000000:Decimal) as $expr223] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr222))] } + StreamProject { exprs: [(sum(sum($expr219)) * 0.0001000000:Decimal) as $expr220] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr219))] } result table: 11, state tables: [] StreamExchange Single from 8 Fragment 8 - StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr222)] } - StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr222, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr219)] } + StreamProject { exprs: [(partsupp.ps_supplycost * partsupp.ps_availqty) as $expr219, partsupp.ps_partkey, partsupp.ps_suppkey, supplier.s_suppkey, nation.n_nationkey, supplier.s_nationkey] } StreamExchange Hash([5]) from 2 - Table 0 { columns: [partsupp_ps_partkey, sum($expr221)], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } - Table 1 { columns: [$expr223], primary key: [], value indices: [0], distribution key: [] } - Table 2 { columns: [partsupp_ps_partkey, count, sum($expr221)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 0 { columns: [partsupp_ps_partkey, sum($expr218)], primary key: [$1 ASC, $0 ASC], value indices: [0, 1], distribution key: [0] } + Table 1 { columns: [$expr220], primary key: [], value indices: [0], distribution key: [] } + Table 2 { columns: [partsupp_ps_partkey, count, sum($expr218)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } Table 3 { columns: [partsupp_ps_partkey, partsupp_ps_availqty, partsupp_ps_supplycost, supplier_s_nationkey, partsupp_ps_suppkey, supplier_s_suppkey], primary key: [$3 ASC, $0 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [3] } Table 4 { columns: [supplier_s_nationkey, partsupp_ps_partkey, partsupp_ps_suppkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } Table 5 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } @@ -2139,7 +2139,7 @@ Table 8 { columns: [partsupp_ps_suppkey, partsupp_ps_partkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 9 { columns: [supplier_s_suppkey, supplier_s_nationkey], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } Table 10 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } - Table 11 { columns: [count, sum(sum($expr222))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 11 { columns: [count, sum(sum($expr219))], primary key: [], value indices: [0, 1], distribution key: [] } Table 4294967294 { columns: [ps_partkey, value], primary key: [$1 DESC, $0 ASC], value indices: [0, 1], distribution key: [0] } - id: tpch_q12 before: @@ -2217,13 +2217,13 @@ Fragment 0 StreamMaterialize { columns: [l_shipmode, high_line_count, low_line_count], pk_columns: [l_shipmode], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [lineitem.l_shipmode, sum($expr143), sum($expr144)] } - StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum($expr143), sum($expr144)] } + StreamProject { exprs: [lineitem.l_shipmode, sum($expr141), sum($expr142)] } + StreamHashAgg { group_key: [lineitem.l_shipmode], aggs: [count, sum($expr141), sum($expr142)] } result table: 0, state tables: [] StreamExchange Hash([0]) from 1 Fragment 1 - StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr143, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr144, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_shipmode, Case(((orders.o_orderpriority = '1-URGENT':Varchar) OR (orders.o_orderpriority = '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr141, Case(((orders.o_orderpriority <> '1-URGENT':Varchar) AND (orders.o_orderpriority <> '2-HIGH':Varchar)), 1:Int32, 0:Int32) as $expr142, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } StreamHashJoin { type: Inner, predicate: orders.o_orderkey = lineitem.l_orderkey, output: [orders.o_orderpriority, lineitem.l_shipmode, orders.o_orderkey, lineitem.l_orderkey, lineitem.l_linenumber] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -2241,7 +2241,7 @@ Upstream BatchPlanNode - Table 0 { columns: [lineitem_l_shipmode, count, sum($expr143), sum($expr144)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } + Table 0 { columns: [lineitem_l_shipmode, count, sum($expr141), sum($expr142)], primary key: [$0 ASC], value indices: [1, 2, 3], distribution key: [0] } Table 1 { columns: [orders_o_orderkey, orders_o_orderpriority], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } Table 2 { columns: [orders_o_orderkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 3 { columns: [lineitem_l_orderkey, lineitem_l_shipmode, lineitem_l_linenumber], primary key: [$0 ASC, $2 ASC], value indices: [0, 1, 2], distribution key: [0] } @@ -2412,14 +2412,14 @@ Fragment 0 StreamMaterialize { columns: [promo_revenue], pk_columns: [], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [((100.00:Decimal * sum(sum($expr215))) / sum(sum($expr216))) as $expr217] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr215)), sum(sum($expr216))] } + StreamProject { exprs: [((100.00:Decimal * sum(sum($expr212))) / sum(sum($expr213))) as $expr214] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr212)), sum(sum($expr213))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr215), sum($expr216)] } - StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Decimal) as $expr215, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr216, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr212), sum($expr213)] } + StreamProject { exprs: [Case(Like(part.p_type, 'PROMO%':Varchar), (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)), 0:Decimal) as $expr212, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr213, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: [lineitem.l_extendedprice, lineitem.l_discount, part.p_type, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, part.p_partkey] } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([0]) from 2 @@ -2437,7 +2437,7 @@ Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum($expr215)), sum(sum($expr216))], primary key: [], value indices: [0, 1, 2], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr212)), sum(sum($expr213))], primary key: [], value indices: [0, 1, 2], distribution key: [] } Table 1 { columns: [lineitem_l_partkey, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } Table 2 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 3 { columns: [part_p_partkey, part_p_type], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } @@ -2510,37 +2510,15 @@ └─LogicalAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr2)] } └─LogicalProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr2] } └─LogicalScan { table: lineitem, output_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount], required_columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < ('1993-01-01':Date + '3 mons':Interval)) } - batch_plan: | - BatchExchange { order: [supplier.s_suppkey ASC], dist: Single } - └─BatchSort { order: [supplier.s_suppkey ASC] } - └─BatchHashJoin { type: Inner, predicate: sum($expr1) = max(max(sum($expr2))), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr1)] } - ├─BatchExchange { order: [], dist: HashShard(sum($expr1)) } - | └─BatchHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr1)] } - | ├─BatchExchange { order: [], dist: HashShard(supplier.s_suppkey) } - | | └─BatchScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], distribution: UpstreamHashShard(supplier.s_suppkey) } - | └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr1)] } - | └─BatchExchange { order: [], dist: HashShard(lineitem.l_suppkey) } - | └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr1] } - | └─BatchFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < '1993-04-01 00:00:00':Timestamp) } - | └─BatchScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } - └─BatchExchange { order: [], dist: HashShard(max(max(sum($expr2)))) } - └─BatchSimpleAgg { aggs: [max(max(sum($expr2)))] } - └─BatchExchange { order: [], dist: Single } - └─BatchSimpleAgg { aggs: [max(sum($expr2))] } - └─BatchHashAgg { group_key: [lineitem.l_suppkey], aggs: [sum($expr2)] } - └─BatchExchange { order: [], dist: HashShard(lineitem.l_suppkey) } - └─BatchProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr2] } - └─BatchFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < '1993-04-01 00:00:00':Timestamp) } - └─BatchScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate], distribution: SomeShard } stream_plan: | - StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr132)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr132)))], pk_conflict: "no check" } + StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr80)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr80)))], pk_conflict: "no check" } └─StreamHashJoin { type: Inner, predicate: sum($expr1) = max(max(sum($expr1))), output: all } ├─StreamExchange { dist: HashShard(sum($expr1)) } | └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr1), lineitem.l_suppkey] } | ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } | | └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } | └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr1)] } - | └─StreamShare { id = 1274 } + | └─StreamShare { id = 820 } | └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr1)] } | └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr1)] } | └─StreamExchange { dist: HashShard(lineitem.l_suppkey) } @@ -2554,7 +2532,7 @@ └─StreamHashAgg { group_key: [$expr2], aggs: [count, max(sum($expr1))] } └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr1), Vnode(lineitem.l_suppkey) as $expr2] } └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr1)] } - └─StreamShare { id = 1274 } + └─StreamShare { id = 820 } └─StreamProject { exprs: [lineitem.l_suppkey, sum($expr1)] } └─StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr1)] } └─StreamExchange { dist: HashShard(lineitem.l_suppkey) } @@ -2563,18 +2541,18 @@ └─StreamTableScan { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } stream_dist_plan: | Fragment 0 - StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr132)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr132)))], pk_conflict: "no check" } + StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey(hidden), max(max(sum($expr80)))(hidden)], pk_columns: [s_suppkey, lineitem.l_suppkey, total_revenue, max(max(sum($expr80)))], pk_conflict: "no check" } materialized table: 4294967294 - StreamHashJoin { type: Inner, predicate: sum($expr132) = max(max(sum($expr132))), output: all } + StreamHashJoin { type: Inner, predicate: sum($expr80) = max(max(sum($expr80))), output: all } left table: 0, right table 2, left degree table: 1, right degree table: 3, StreamExchange Hash([4]) from 1 StreamExchange Hash([0]) from 5 Fragment 1 - StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr132), lineitem.l_suppkey] } + StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = lineitem.l_suppkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, sum($expr80), lineitem.l_suppkey] } left table: 4, right table 6, left degree table: 5, right degree table: 7, StreamExchange Hash([0]) from 2 - StreamProject { exprs: [lineitem.l_suppkey, sum($expr132)] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr80)] } StreamExchange Hash([0]) from 3 Fragment 2 @@ -2583,45 +2561,45 @@ BatchPlanNode Fragment 3 - StreamProject { exprs: [lineitem.l_suppkey, sum($expr138)] } - StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr138)] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr86)] } + StreamHashAgg { group_key: [lineitem.l_suppkey], aggs: [count, sum($expr86)] } result table: 8, state tables: [] StreamExchange Hash([0]) from 4 Fragment 4 - StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr138, lineitem.l_orderkey, lineitem.l_linenumber] } + StreamProject { exprs: [lineitem.l_suppkey, (lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr86, lineitem.l_orderkey, lineitem.l_linenumber] } StreamFilter { predicate: (lineitem.l_shipdate >= '1993-01-01':Date) AND (lineitem.l_shipdate < '1993-04-01 00:00:00':Timestamp) } Chain { table: lineitem, columns: [lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_shipdate], pk: [lineitem.l_orderkey, lineitem.l_linenumber], dist: UpstreamHashShard(lineitem.l_orderkey, lineitem.l_linenumber) } Upstream BatchPlanNode Fragment 5 - StreamProject { exprs: [max(max(sum($expr132)))] } - StreamGlobalSimpleAgg { aggs: [count, max(max(sum($expr132)))] } + StreamProject { exprs: [max(max(sum($expr80)))] } + StreamGlobalSimpleAgg { aggs: [count, max(max(sum($expr80)))] } result table: 10, state tables: [9] StreamExchange Single from 6 Fragment 6 - StreamHashAgg { group_key: [$expr139], aggs: [count, max(sum($expr132))] } + StreamHashAgg { group_key: [$expr87], aggs: [count, max(sum($expr80))] } result table: 12, state tables: [11] - StreamProject { exprs: [lineitem.l_suppkey, sum($expr132), Vnode(lineitem.l_suppkey) as $expr139] } - StreamProject { exprs: [lineitem.l_suppkey, sum($expr132)] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr80), Vnode(lineitem.l_suppkey) as $expr87] } + StreamProject { exprs: [lineitem.l_suppkey, sum($expr80)] } StreamExchange Hash([0]) from 3 - Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone, sum($expr132), lineitem_l_suppkey], primary key: [$4 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [4] } - Table 1 { columns: [sum($expr132), supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } - Table 2 { columns: [max(max(sum($expr132)))], primary key: [$0 ASC], value indices: [0], distribution key: [0] } - Table 3 { columns: [max(max(sum($expr132))), _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } + Table 0 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone, sum($expr80), lineitem_l_suppkey], primary key: [$4 ASC, $0 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [4] } + Table 1 { columns: [sum($expr80), supplier_s_suppkey, lineitem_l_suppkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } + Table 2 { columns: [max(max(sum($expr80)))], primary key: [$0 ASC], value indices: [0], distribution key: [0] } + Table 3 { columns: [max(max(sum($expr80))), _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 4 { columns: [supplier_s_suppkey, supplier_s_name, supplier_s_address, supplier_s_phone], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } Table 5 { columns: [supplier_s_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } - Table 6 { columns: [lineitem_l_suppkey, sum($expr132)], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 6 { columns: [lineitem_l_suppkey, sum($expr80)], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } Table 7 { columns: [lineitem_l_suppkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } - Table 8 { columns: [lineitem_l_suppkey, count, sum($expr138)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } - Table 9 { columns: [max(sum($expr132)), $expr139], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } - Table 10 { columns: [count, max(max(sum($expr132)))], primary key: [], value indices: [0, 1], distribution key: [] } - Table 11 { columns: [$expr139, sum($expr132), lineitem_l_suppkey], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } - Table 12 { columns: [$expr139, count, max(sum($expr132))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } - Table 4294967294 { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey, max(max(sum($expr132)))], primary key: [$0 ASC, $5 ASC, $4 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [4] } + Table 8 { columns: [lineitem_l_suppkey, count, sum($expr86)], primary key: [$0 ASC], value indices: [1, 2], distribution key: [0] } + Table 9 { columns: [max(sum($expr80)), $expr87], primary key: [$0 DESC, $1 ASC], value indices: [0, 1], distribution key: [] } + Table 10 { columns: [count, max(max(sum($expr80)))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 11 { columns: [$expr87, sum($expr80), lineitem_l_suppkey], primary key: [$0 ASC, $1 DESC, $2 ASC], value indices: [1, 2], distribution key: [2], vnode column idx: 0 } + Table 12 { columns: [$expr87, count, max(sum($expr80))], primary key: [$0 ASC], value indices: [1, 2], distribution key: [], vnode column idx: 0 } + Table 4294967294 { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, lineitem.l_suppkey, max(max(sum($expr80)))], primary key: [$0 ASC, $5 ASC, $4 ASC, $6 ASC], value indices: [0, 1, 2, 3, 4, 5, 6], distribution key: [4] } - id: tpch_q16 before: - create_tables @@ -2698,7 +2676,7 @@ | ├─StreamExchange { dist: HashShard(partsupp.ps_partkey) } | | └─StreamTableScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey], pk: [partsupp.ps_partkey, partsupp.ps_suppkey], dist: UpstreamHashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } | └─StreamExchange { dist: HashShard(part.p_partkey) } - | └─StreamFilter { predicate: (part.p_brand <> 'Brand#45':Varchar) AND (Not((part.p_type >= 'SMALL PLATED':Varchar)) OR Not((part.p_type < 'SMALL PLATEE':Varchar))) AND In(part.p_size, 19:Int32, 17:Int32, 16:Int32, 23:Int32, 10:Int32, 4:Int32, 38:Int32, 11:Int32) } + | └─StreamFilter { predicate: (part.p_brand <> 'Brand#45':Varchar) AND Not(Like(part.p_type, 'SMALL PLATED%':Varchar)) AND In(part.p_size, 19:Int32, 17:Int32, 16:Int32, 23:Int32, 10:Int32, 4:Int32, 38:Int32, 11:Int32) } | └─StreamTableScan { table: part, columns: [part.p_partkey, part.p_brand, part.p_type, part.p_size], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } └─StreamExchange { dist: HashShard(supplier.s_suppkey) } └─StreamProject { exprs: [supplier.s_suppkey] } @@ -2731,7 +2709,7 @@ BatchPlanNode Fragment 4 - StreamFilter { predicate: (part.p_brand <> 'Brand#45':Varchar) AND (Not((part.p_type >= 'SMALL PLATED':Varchar)) OR Not((part.p_type < 'SMALL PLATEE':Varchar))) AND In(part.p_size, 19:Int32, 17:Int32, 16:Int32, 23:Int32, 10:Int32, 4:Int32, 38:Int32, 11:Int32) } + StreamFilter { predicate: (part.p_brand <> 'Brand#45':Varchar) AND Not(Like(part.p_type, 'SMALL PLATED%':Varchar)) AND In(part.p_size, 19:Int32, 17:Int32, 16:Int32, 23:Int32, 10:Int32, 4:Int32, 38:Int32, 11:Int32) } Chain { table: part, columns: [part.p_partkey, part.p_brand, part.p_type, part.p_size], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } Upstream BatchPlanNode @@ -2855,7 +2833,7 @@ Fragment 0 StreamMaterialize { columns: [avg_yearly], pk_columns: [], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32) as $expr155] } + StreamProject { exprs: [RoundDigit((sum(sum(lineitem.l_extendedprice)) / 7.0:Decimal), 16:Int32) as $expr153] } StreamGlobalSimpleAgg { aggs: [count, sum(sum(lineitem.l_extendedprice))] } result table: 0, state tables: [] StreamExchange Single from 1 @@ -2863,11 +2841,11 @@ Fragment 1 StreamStatelessLocalSimpleAgg { aggs: [count, sum(lineitem.l_extendedprice)] } StreamProject { exprs: [lineitem.l_extendedprice, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey, part.p_partkey] } - StreamFilter { predicate: (lineitem.l_quantity < $expr154) } + StreamFilter { predicate: (lineitem.l_quantity < $expr152) } StreamHashJoin { type: Inner, predicate: part.p_partkey IS NOT DISTINCT FROM part.p_partkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, StreamExchange Hash([2]) from 2 - StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr154] } + StreamProject { exprs: [part.p_partkey, (0.2:Decimal * (sum(lineitem.l_quantity) / count(lineitem.l_quantity))) as $expr152] } StreamHashAgg { group_key: [part.p_partkey], aggs: [count, sum(lineitem.l_quantity), count(lineitem.l_quantity)] } result table: 9, state tables: [] StreamHashJoin { type: LeftOuter, predicate: part.p_partkey IS NOT DISTINCT FROM lineitem.l_partkey, output: [part.p_partkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey] } @@ -2912,7 +2890,7 @@ Table 0 { columns: [count, sum(sum(lineitem_l_extendedprice))], primary key: [], value indices: [0, 1], distribution key: [] } Table 1 { columns: [lineitem_l_quantity, lineitem_l_extendedprice, part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey], primary key: [$2 ASC, $3 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [2] } Table 2 { columns: [part_p_partkey, lineitem_l_orderkey, lineitem_l_linenumber, lineitem_l_partkey, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC, $3 ASC], value indices: [4], distribution key: [0] } - Table 3 { columns: [part_p_partkey, $expr154], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } + Table 3 { columns: [part_p_partkey, $expr152], primary key: [$0 ASC], value indices: [0, 1], distribution key: [0] } Table 4 { columns: [part_p_partkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } Table 5 { columns: [lineitem_l_partkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $3 ASC, $4 ASC], value indices: [0, 1, 2, 3, 4], distribution key: [0] } Table 6 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } @@ -3193,14 +3171,14 @@ Fragment 0 StreamMaterialize { columns: [revenue], pk_columns: [], pk_conflict: "no check" } materialized table: 4294967294 - StreamProject { exprs: [sum(sum($expr73))] } - StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr73))] } + StreamProject { exprs: [sum(sum($expr72))] } + StreamGlobalSimpleAgg { aggs: [count, sum(sum($expr72))] } result table: 0, state tables: [] StreamExchange Single from 1 Fragment 1 - StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr73)] } - StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr73, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } + StreamStatelessLocalSimpleAgg { aggs: [count, sum($expr72)] } + StreamProject { exprs: [(lineitem.l_extendedprice * (1:Int32 - lineitem.l_discount)) as $expr72, lineitem.l_orderkey, lineitem.l_linenumber, part.p_partkey, lineitem.l_partkey] } StreamFilter { predicate: (((((((part.p_brand = 'Brand#52':Varchar) AND In(part.p_container, 'SM CASE':Varchar, 'SM BOX':Varchar, 'SM PACK':Varchar, 'SM PKG':Varchar)) AND (lineitem.l_quantity >= 1:Int32)) AND (lineitem.l_quantity <= 11:Int32)) AND (part.p_size <= 5:Int32)) OR (((((part.p_brand = 'Brand#24':Varchar) AND In(part.p_container, 'MED BAG':Varchar, 'MED BOX':Varchar, 'MED PKG':Varchar, 'MED PACK':Varchar)) AND (lineitem.l_quantity >= 30:Int32)) AND (lineitem.l_quantity <= 40:Int32)) AND (part.p_size <= 10:Int32))) OR (((((part.p_brand = 'Brand#32':Varchar) AND In(part.p_container, 'LG CASE':Varchar, 'LG BOX':Varchar, 'LG PACK':Varchar, 'LG PKG':Varchar)) AND (lineitem.l_quantity >= 10:Int32)) AND (lineitem.l_quantity <= 20:Int32)) AND (part.p_size <= 15:Int32))) } StreamHashJoin { type: Inner, predicate: lineitem.l_partkey = part.p_partkey, output: all } left table: 1, right table 3, left degree table: 2, right degree table: 4, @@ -3220,7 +3198,7 @@ Upstream BatchPlanNode - Table 0 { columns: [count, sum(sum($expr73))], primary key: [], value indices: [0, 1], distribution key: [] } + Table 0 { columns: [count, sum(sum($expr72))], primary key: [], value indices: [0, 1], distribution key: [] } Table 1 { columns: [lineitem_l_partkey, lineitem_l_quantity, lineitem_l_extendedprice, lineitem_l_discount, lineitem_l_orderkey, lineitem_l_linenumber], primary key: [$0 ASC, $4 ASC, $5 ASC], value indices: [0, 1, 2, 3, 4, 5], distribution key: [0] } Table 2 { columns: [lineitem_l_partkey, lineitem_l_orderkey, lineitem_l_linenumber, _degree], primary key: [$0 ASC, $1 ASC, $2 ASC], value indices: [3], distribution key: [0] } Table 3 { columns: [part_p_partkey, part_p_brand, part_p_size, part_p_container], primary key: [$0 ASC], value indices: [0, 1, 2, 3], distribution key: [0] } @@ -3350,7 +3328,7 @@ | | └─StreamTableScan { table: partsupp, columns: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty], pk: [partsupp.ps_partkey, partsupp.ps_suppkey], dist: UpstreamHashShard(partsupp.ps_partkey, partsupp.ps_suppkey) } | └─StreamExchange { dist: HashShard(part.p_partkey) } | └─StreamProject { exprs: [part.p_partkey] } - | └─StreamFilter { predicate: (part.p_name >= 'forest':Varchar) AND (part.p_name < 'foresu':Varchar) } + | └─StreamFilter { predicate: Like(part.p_name, 'forest%':Varchar) } | └─StreamTableScan { table: part, columns: [part.p_partkey, part.p_name], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } └─StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr2] } └─StreamHashAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [count, sum(lineitem.l_quantity)] } @@ -3392,11 +3370,11 @@ Fragment 4 StreamProject { exprs: [partsupp.ps_suppkey, partsupp.ps_partkey, partsupp.ps_partkey, partsupp.ps_suppkey] } - StreamFilter { predicate: ($expr116 > $expr117) } + StreamFilter { predicate: ($expr115 > $expr116) } StreamHashJoin { type: Inner, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM partsupp.ps_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM partsupp.ps_suppkey, output: all } left table: 8, right table 10, left degree table: 9, right degree table: 11, StreamExchange Hash([0, 1]) from 5 - StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr117] } + StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, (0.5:Decimal * sum(lineitem.l_quantity)) as $expr116] } StreamHashAgg { group_key: [partsupp.ps_partkey, partsupp.ps_suppkey], aggs: [count, sum(lineitem.l_quantity)] } result table: 16, state tables: [] StreamHashJoin { type: LeftOuter, predicate: partsupp.ps_partkey IS NOT DISTINCT FROM lineitem.l_partkey AND partsupp.ps_suppkey IS NOT DISTINCT FROM lineitem.l_suppkey, output: [partsupp.ps_partkey, partsupp.ps_suppkey, lineitem.l_quantity, lineitem.l_orderkey, lineitem.l_linenumber, lineitem.l_partkey, lineitem.l_suppkey] } @@ -3405,7 +3383,7 @@ StreamExchange Hash([0, 1]) from 9 Fragment 5 - StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr116] } + StreamProject { exprs: [partsupp.ps_partkey, partsupp.ps_suppkey, partsupp.ps_availqty::Decimal as $expr115] } StreamHashJoin { type: LeftSemi, predicate: partsupp.ps_partkey = part.p_partkey, output: all } left table: 12, right table 14, left degree table: 13, right degree table: 15, StreamExchange Hash([0]) from 6 @@ -3418,7 +3396,7 @@ Fragment 7 StreamProject { exprs: [part.p_partkey] } - StreamFilter { predicate: (part.p_name >= 'forest':Varchar) AND (part.p_name < 'foresu':Varchar) } + StreamFilter { predicate: Like(part.p_name, 'forest%':Varchar) } Chain { table: part, columns: [part.p_partkey, part.p_name], pk: [part.p_partkey], dist: UpstreamHashShard(part.p_partkey) } Upstream BatchPlanNode @@ -3446,9 +3424,9 @@ Table 5 { columns: [supplier_s_nationkey, supplier_s_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } Table 6 { columns: [nation_n_nationkey], primary key: [$0 ASC], value indices: [0], distribution key: [0] } Table 7 { columns: [nation_n_nationkey, _degree], primary key: [$0 ASC], value indices: [1], distribution key: [0] } - Table 8 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr116], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } + Table 8 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr115], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } Table 9 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } - Table 10 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr117], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } + Table 10 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, $expr116], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0, 1] } Table 11 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0, 1] } Table 12 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, partsupp_ps_availqty], primary key: [$0 ASC, $1 ASC], value indices: [0, 1, 2], distribution key: [0] } Table 13 { columns: [partsupp_ps_partkey, partsupp_ps_suppkey, _degree], primary key: [$0 ASC, $1 ASC], value indices: [2], distribution key: [0] } diff --git a/src/frontend/src/optimizer/heuristic_optimizer.rs b/src/frontend/src/optimizer/heuristic_optimizer.rs index 0ddbe0f96abe..a931a41f365f 100644 --- a/src/frontend/src/optimizer/heuristic_optimizer.rs +++ b/src/frontend/src/optimizer/heuristic_optimizer.rs @@ -33,12 +33,12 @@ pub enum ApplyOrder { /// apply each rule on them. pub struct HeuristicOptimizer<'a> { apply_order: &'a ApplyOrder, - rules: &'a Vec, + rules: &'a [BoxedRule], stats: Stats, } impl<'a> HeuristicOptimizer<'a> { - pub fn new(apply_order: &'a ApplyOrder, rules: &'a Vec) -> Self { + pub fn new(apply_order: &'a ApplyOrder, rules: &'a [BoxedRule]) -> Self { Self { apply_order, rules, diff --git a/src/frontend/src/optimizer/logical_optimization.rs b/src/frontend/src/optimizer/logical_optimization.rs new file mode 100644 index 000000000000..5c8da4d0f416 --- /dev/null +++ b/src/frontend/src/optimizer/logical_optimization.rs @@ -0,0 +1,536 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use itertools::Itertools; +use lazy_static::lazy_static; +use risingwave_common::error::{ErrorCode, Result}; + +use crate::optimizer::heuristic_optimizer::{ApplyOrder, HeuristicOptimizer}; +use crate::optimizer::plan_node::{ColumnPruningContext, PredicatePushdownContext}; +use crate::optimizer::plan_rewriter::ShareSourceRewriter; +#[cfg(debug_assertions)] +use crate::optimizer::plan_visitor::InputRefValidator; +use crate::optimizer::plan_visitor::{ + has_logical_apply, has_logical_over_agg, HasMaxOneRowApply, PlanVisitor, +}; +use crate::optimizer::rule::*; +use crate::optimizer::PlanRef; +use crate::utils::Condition; +use crate::Explain; + +impl PlanRef { + pub(crate) fn optimize_by_rules(self, stage: &OptimizationStage) -> PlanRef { + let OptimizationStage { + stage_name, + rules, + apply_order, + } = stage; + + let mut heuristic_optimizer = HeuristicOptimizer::new(apply_order, rules); + let plan = heuristic_optimizer.optimize(self); + let stats = heuristic_optimizer.get_stats(); + + let ctx = plan.ctx(); + let explain_trace = ctx.is_explain_trace(); + if explain_trace && stats.has_applied_rule() { + ctx.trace(format!("{}:", stage_name)); + ctx.trace(format!("{}", stats)); + ctx.trace(plan.explain_to_string().unwrap()); + } + + plan + } + + pub(crate) fn optimize_by_rules_until_fix_point(self, stage: &OptimizationStage) -> PlanRef { + let OptimizationStage { + stage_name, + rules, + apply_order, + } = stage; + + let mut output_plan = self; + loop { + let mut heuristic_optimizer = HeuristicOptimizer::new(apply_order, rules); + output_plan = heuristic_optimizer.optimize(output_plan); + let stats = heuristic_optimizer.get_stats(); + + let ctx = output_plan.ctx(); + let explain_trace = ctx.is_explain_trace(); + if explain_trace && stats.has_applied_rule() { + ctx.trace(format!("{}:", stage_name)); + ctx.trace(format!("{}", stats)); + ctx.trace(output_plan.explain_to_string().unwrap()); + } + + if !stats.has_applied_rule() { + return output_plan; + } + } + } +} + +pub struct OptimizationStage { + stage_name: String, + rules: Vec, + apply_order: ApplyOrder, +} + +impl OptimizationStage { + pub fn new(name: S, rules: Vec, apply_order: ApplyOrder) -> Self + where + S: Into, + { + OptimizationStage { + stage_name: name.into(), + rules, + apply_order, + } + } +} + +pub struct LogicalOptimizer {} + +lazy_static! { + static ref DAG_TO_TREE: OptimizationStage = OptimizationStage::new( + "DAG To Tree", + vec![DagToTreeRule::create()], + ApplyOrder::TopDown, + ); + + static ref SIMPLE_UNNESTING: OptimizationStage = OptimizationStage::new( + "Simple Unnesting", + vec![ + // Eliminate max one row + MaxOneRowEliminateRule::create(), + // Convert apply to join. + ApplyToJoinRule::create(), + // Pull correlated predicates up the algebra tree to unnest simple subquery. + PullUpCorrelatedPredicateRule::create(), + ], + ApplyOrder::TopDown, + ); + + static ref UNION_MERGE: OptimizationStage = OptimizationStage::new( + "Union Merge", + vec![UnionMergeRule::create()], + ApplyOrder::BottomUp, + ); + + + static ref GENERAL_UNNESTING_TRANS_APPLY_WITH_SHARE: OptimizationStage = OptimizationStage::new( + "General Unnesting(Translate Apply)", + vec![TranslateApplyRule::create(true)], + ApplyOrder::BottomUp, + ); + + static ref GENERAL_UNNESTING_TRANS_APPLY_WITHOUT_SHARE: OptimizationStage = OptimizationStage::new( + "General Unnesting(Translate Apply)", + vec![TranslateApplyRule::create(false)], + ApplyOrder::BottomUp, + ); + + static ref GENERAL_UNNESTING_PUSH_DOWN_APPLY: OptimizationStage = OptimizationStage::new( + "General Unnesting(Push Down Apply)", + vec![ + ApplyAggTransposeRule::create(), + ApplyFilterTransposeRule::create(), + ApplyProjectTransposeRule::create(), + ApplyJoinTransposeRule::create(), + ApplyShareEliminateRule::create(), + ApplyScanRule::create(), + ], + ApplyOrder::TopDown, + ); + + static ref TO_MULTI_JOIN: OptimizationStage = OptimizationStage::new( + "To MultiJoin", + vec![MergeMultiJoinRule::create()], + ApplyOrder::TopDown, + ); + + static ref JOIN_REORDER: OptimizationStage = OptimizationStage::new( + "Join Reorder".to_string(), + vec![ReorderMultiJoinRule::create()], + ApplyOrder::TopDown, + ); + + static ref FILTER_WITH_NOW_TO_JOIN: OptimizationStage = OptimizationStage::new( + "Push down filter with now into a left semijoin", + vec![FilterWithNowToJoinRule::create()], + ApplyOrder::TopDown, + ); + + static ref PUSH_CALC_OF_JOIN: OptimizationStage = OptimizationStage::new( + "Push Down the Calculation of Inputs of Join's Condition", + vec![PushCalculationOfJoinRule::create()], + ApplyOrder::TopDown, + ); + + static ref CONVERT_DISTINCT_AGG_FOR_STREAM: OptimizationStage = OptimizationStage::new( + "Convert Distinct Aggregation", + vec![UnionToDistinctRule::create(), DistinctAggRule::create(true)], + ApplyOrder::TopDown, + ); + + static ref CONVERT_DISTINCT_AGG_FOR_BATCH: OptimizationStage = OptimizationStage::new( + "Convert Distinct Aggregation", + vec![UnionToDistinctRule::create(), DistinctAggRule::create(false)], + ApplyOrder::TopDown, + ); + + static ref JOIN_COMMUTE: OptimizationStage = OptimizationStage::new( + "Join Commute".to_string(), + vec![JoinCommuteRule::create()], + ApplyOrder::TopDown, + ); + + static ref PROJECT_REMOVE: OptimizationStage = OptimizationStage::new( + "Project Remove", + vec![ + // merge should be applied before eliminate + ProjectMergeRule::create(), + ProjectEliminateRule::create(), + TrivialProjectToValuesRule::create(), + UnionInputValuesMergeRule::create(), + // project-join merge should be applied after merge + // eliminate and to values + ProjectJoinMergeRule::create(), + AggProjectMergeRule::create(), + ], + ApplyOrder::BottomUp, + ); + + static ref CONVERT_WINDOW_AGG: OptimizationStage = OptimizationStage::new( + "Convert Window Aggregation", + vec![ + OverAggToTopNRule::create(), + ProjectMergeRule::create(), + ProjectEliminateRule::create(), + TrivialProjectToValuesRule::create(), + UnionInputValuesMergeRule::create(), + ], + ApplyOrder::TopDown, + ); + + + static ref DEDUP_GROUP_KEYS: OptimizationStage = OptimizationStage::new( + "Dedup Group keys", + vec![AggDedupGroupKeyRule::create()], + ApplyOrder::TopDown, + ); + + static ref REWRITE_LIKE_EXPR: OptimizationStage = OptimizationStage::new( + "Rewrite Like Expr", + vec![RewriteLikeExprRule::create()], + ApplyOrder::TopDown, + ); + + static ref AGG_ON_INDEX: OptimizationStage = OptimizationStage::new( + "Agg on Index", + vec![TopNOnIndexRule::create()], + ApplyOrder::TopDown, + ); +} + +impl LogicalOptimizer { + pub fn gen_optimized_logical_plan_for_stream(mut plan: PlanRef) -> Result { + let ctx = plan.ctx(); + let explain_trace = ctx.is_explain_trace(); + + if explain_trace { + ctx.trace("Begin:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // If share plan is disable, we need to remove all the share operator generated by the + // binder, e.g. CTE and View. However, we still need to share source to ensure self + // source join can return correct result. + let enable_share_plan = ctx.session_ctx().config().get_enable_share_plan(); + if enable_share_plan { + // Common sub-plan detection. + plan = plan.merge_eq_nodes(); + plan = plan.prune_share(); + if explain_trace { + ctx.trace("Merging equivalent nodes:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + } else { + plan = plan.optimize_by_rules(&DAG_TO_TREE); + + // Replace source to share source. + // Perform share source at the beginning so that we can benefit from predicate pushdown + // and column pruning for the share operator. + plan = ShareSourceRewriter::share_source(plan); + if explain_trace { + ctx.trace("Share Source:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + } + + // Simple Unnesting. + plan = plan.optimize_by_rules(&SIMPLE_UNNESTING); + if HasMaxOneRowApply().visit(plan.clone()) { + return Err(ErrorCode::InternalError( + "Scalar subquery might produce more than one row.".into(), + ) + .into()); + } + + plan = plan.optimize_by_rules(&UNION_MERGE); + + // Predicate push down before translate apply, because we need to calculate the domain + // and predicate push down can reduce the size of domain. + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // General Unnesting. + // Translate Apply, push Apply down the plan and finally replace Apply with regular inner + // join. + plan = if enable_share_plan { + plan.optimize_by_rules(&GENERAL_UNNESTING_TRANS_APPLY_WITH_SHARE) + } else { + plan.optimize_by_rules(&GENERAL_UNNESTING_TRANS_APPLY_WITHOUT_SHARE) + }; + + plan = plan.optimize_by_rules_until_fix_point(&GENERAL_UNNESTING_PUSH_DOWN_APPLY); + if has_logical_apply(plan.clone()) { + return Err(ErrorCode::InternalError("Subquery can not be unnested.".into()).into()); + } + + // Predicate Push-down + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Merge inner joins and intermediate filters into multijoin + // This rule assumes that filters have already been pushed down near to + // their relevant joins. + plan = plan.optimize_by_rules(&TO_MULTI_JOIN); + + // Reorder multijoin into left-deep join tree. + plan = plan.optimize_by_rules(&JOIN_REORDER); + + // Predicate Push-down: apply filter pushdown rules again since we pullup all join + // conditions into a filter above the multijoin. + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // For stream, push down predicates with now into a left-semi join + plan = plan.optimize_by_rules(&FILTER_WITH_NOW_TO_JOIN); + + // Push down the calculation of inputs of join's condition. + plan = plan.optimize_by_rules(&PUSH_CALC_OF_JOIN); + + // Prune Columns + let required_cols = (0..plan.schema().len()).collect_vec(); + let mut column_pruning_ctx = ColumnPruningContext::new(plan.clone()); + plan = plan.prune_col(&required_cols, &mut column_pruning_ctx); + // Column pruning may introduce additional projects, and filter can be pushed again. + if explain_trace { + ctx.trace("Prune Columns:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + if column_pruning_ctx.need_second_round() { + // Second round of column pruning and reuse the column pruning context. + // Try to replace original share operator with the new one. + plan = plan.prune_col(&required_cols, &mut column_pruning_ctx); + if explain_trace { + ctx.trace("Prune Columns (For DAG):"); + ctx.trace(plan.explain_to_string().unwrap()); + } + } + + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Convert distinct aggregates. + plan = plan.optimize_by_rules(&CONVERT_DISTINCT_AGG_FOR_STREAM); + + plan = plan.optimize_by_rules(&JOIN_COMMUTE); + + plan = plan.optimize_by_rules(&PROJECT_REMOVE); + + plan = plan.optimize_by_rules(&CONVERT_WINDOW_AGG); + + if has_logical_over_agg(plan.clone()) { + return Err(ErrorCode::InternalError(format!( + "OverAgg can not be transformed. Plan:\n{}", + plan.explain_to_string().unwrap() + )) + .into()); + } + + plan = plan.optimize_by_rules(&DEDUP_GROUP_KEYS); + + #[cfg(debug_assertions)] + InputRefValidator.validate(plan.clone()); + + if ctx.is_explain_logical() { + ctx.store_logical(plan.explain_to_string().unwrap()); + } + + Ok(plan) + } + + pub fn gen_optimized_logical_plan_for_batch(mut plan: PlanRef) -> Result { + let ctx = plan.ctx(); + let explain_trace = ctx.is_explain_trace(); + + if explain_trace { + ctx.trace("Begin:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Convert the dag back to the tree, because we don't support DAG plan for batch. + plan = plan.optimize_by_rules(&DAG_TO_TREE); + + plan = plan.optimize_by_rules(&REWRITE_LIKE_EXPR); + + // Simple Unnesting. + plan = plan.optimize_by_rules(&SIMPLE_UNNESTING); + if HasMaxOneRowApply().visit(plan.clone()) { + return Err(ErrorCode::InternalError( + "Scalar subquery might produce more than one row.".into(), + ) + .into()); + } + + plan = plan.optimize_by_rules(&UNION_MERGE); + + // Predicate push down before translate apply, because we need to calculate the domain + // and predicate push down can reduce the size of domain. + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // General Unnesting. + // Translate Apply, push Apply down the plan and finally replace Apply with regular inner + // join. + plan = plan.optimize_by_rules(&GENERAL_UNNESTING_TRANS_APPLY_WITHOUT_SHARE); + plan = plan.optimize_by_rules_until_fix_point(&GENERAL_UNNESTING_PUSH_DOWN_APPLY); + if has_logical_apply(plan.clone()) { + return Err(ErrorCode::InternalError("Subquery can not be unnested.".into()).into()); + } + + // Predicate Push-down + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Merge inner joins and intermediate filters into multijoin + // This rule assumes that filters have already been pushed down near to + // their relevant joins. + plan = plan.optimize_by_rules(&TO_MULTI_JOIN); + + // Reorder multijoin into left-deep join tree. + plan = plan.optimize_by_rules(&JOIN_REORDER); + + // Predicate Push-down: apply filter pushdown rules again since we pullup all join + // conditions into a filter above the multijoin. + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Push down the calculation of inputs of join's condition. + plan = plan.optimize_by_rules(&PUSH_CALC_OF_JOIN); + + // Prune Columns + let required_cols = (0..plan.schema().len()).collect_vec(); + let mut column_pruning_ctx = ColumnPruningContext::new(plan.clone()); + plan = plan.prune_col(&required_cols, &mut column_pruning_ctx); + // Column pruning may introduce additional projects, and filter can be pushed again. + if explain_trace { + ctx.trace("Prune Columns:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + plan = plan.predicate_pushdown( + Condition::true_cond(), + &mut PredicatePushdownContext::new(plan.clone()), + ); + if explain_trace { + ctx.trace("Predicate Push Down:"); + ctx.trace(plan.explain_to_string().unwrap()); + } + + // Convert distinct aggregates. + plan = plan.optimize_by_rules(&CONVERT_DISTINCT_AGG_FOR_BATCH); + + plan = plan.optimize_by_rules(&JOIN_COMMUTE); + + plan = plan.optimize_by_rules(&PROJECT_REMOVE); + + plan = plan.optimize_by_rules(&CONVERT_WINDOW_AGG); + + if has_logical_over_agg(plan.clone()) { + return Err(ErrorCode::InternalError(format!( + "OverAgg can not be transformed. Plan:\n{}", + plan.explain_to_string().unwrap() + )) + .into()); + } + + plan = plan.optimize_by_rules(&DEDUP_GROUP_KEYS); + + plan = plan.optimize_by_rules(&AGG_ON_INDEX); + + #[cfg(debug_assertions)] + InputRefValidator.validate(plan.clone()); + + if ctx.is_explain_logical() { + ctx.store_logical(plan.explain_to_string().unwrap()); + } + + Ok(plan) + } +} diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs index 3775ad59ae8e..a5b07ee2139f 100644 --- a/src/frontend/src/optimizer/mod.rs +++ b/src/frontend/src/optimizer/mod.rs @@ -23,20 +23,21 @@ mod plan_rewriter; pub use plan_rewriter::PlanRewriter; mod plan_visitor; pub use plan_visitor::PlanVisitor; +mod logical_optimization; mod optimizer_context; mod plan_expr_rewriter; mod rule; use fixedbitset::FixedBitSet; use itertools::Itertools as _; +pub use logical_optimization::*; pub use optimizer_context::*; use plan_expr_rewriter::ConstEvalRewriter; -use plan_rewriter::ShareSourceRewriter; use property::Order; use risingwave_common::catalog::{ColumnCatalog, Field, Schema}; use risingwave_common::error::{ErrorCode, Result}; use risingwave_common::util::iter_util::ZipEqDebug; -use self::heuristic_optimizer::{ApplyOrder, HeuristicOptimizer}; +use self::heuristic_optimizer::ApplyOrder; use self::plan_node::{ BatchProject, Convention, LogicalProject, StreamDml, StreamMaterialize, StreamProject, StreamRowIdGen, StreamSink, @@ -44,19 +45,17 @@ use self::plan_node::{ #[cfg(debug_assertions)] use self::plan_visitor::InputRefValidator; use self::plan_visitor::{ - has_batch_delete, has_batch_exchange, has_batch_insert, has_batch_update, has_logical_apply, - has_logical_over_agg, HasMaxOneRowApply, + has_batch_delete, has_batch_exchange, has_batch_insert, has_batch_update, }; use self::property::RequiredDist; use self::rule::*; use crate::catalog::table_catalog::{TableType, TableVersion}; use crate::expr::InputRef; use crate::optimizer::plan_node::{ - BatchExchange, ColumnPruningContext, PlanNodeType, PlanTreeNode, PredicatePushdownContext, - RewriteExprsRecursive, + BatchExchange, PlanNodeType, PlanTreeNode, RewriteExprsRecursive, }; use crate::optimizer::property::Distribution; -use crate::utils::{ColIndexMappingRewriteExt, Condition}; +use crate::utils::ColIndexMappingRewriteExt; use crate::WithOptions; /// `PlanRoot` is used to describe a plan. planner will construct a `PlanRoot` with `LogicalNode`. @@ -146,356 +145,20 @@ impl PlanRoot { LogicalProject::with_out_fields(self.plan, &self.out_fields).into() } - fn optimize_by_rules( - &self, - plan: PlanRef, - stage_name: String, - rules: Vec, - apply_order: ApplyOrder, - ) -> PlanRef { - let mut heuristic_optimizer = HeuristicOptimizer::new(&apply_order, &rules); - let plan = heuristic_optimizer.optimize(plan); - let stats = heuristic_optimizer.get_stats(); - - let ctx = plan.ctx(); - let explain_trace = ctx.is_explain_trace(); - if explain_trace && stats.has_applied_rule() { - ctx.trace(format!("{}:", stage_name)); - ctx.trace(format!("{}", stats)); - ctx.trace(plan.explain_to_string().unwrap()); - } - - plan - } - - fn optimize_by_rules_until_fix_point( - &self, - plan: PlanRef, - stage_name: String, - rules: Vec, - apply_order: ApplyOrder, - ) -> PlanRef { - let mut output_plan = plan; - loop { - let mut heuristic_optimizer = HeuristicOptimizer::new(&apply_order, &rules); - output_plan = heuristic_optimizer.optimize(output_plan); - let stats = heuristic_optimizer.get_stats(); - - let ctx = output_plan.ctx(); - let explain_trace = ctx.is_explain_trace(); - if explain_trace && stats.has_applied_rule() { - ctx.trace(format!("{}:", stage_name)); - ctx.trace(format!("{}", stats)); - ctx.trace(output_plan.explain_to_string().unwrap()); - } - - if !stats.has_applied_rule() { - return output_plan; - } - } + /// Apply logical optimization to the plan for stream. + pub fn gen_optimized_logical_plan_for_stream(&self) -> Result { + LogicalOptimizer::gen_optimized_logical_plan_for_stream(self.plan.clone()) } - /// Apply logical optimization to the plan. - pub fn gen_optimized_logical_plan(&self) -> Result { - self.gen_optimized_logical_plan_inner(false) - } - - fn gen_optimized_logical_plan_inner(&self, for_stream: bool) -> Result { - let mut plan = self.plan.clone(); - let ctx = plan.ctx(); - let explain_trace = ctx.is_explain_trace(); - - if explain_trace { - ctx.trace("Begin:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - // If share plan is disable, we need to remove all the share operator generated by the - // binder, e.g. CTE and View. However, we still need to share source to ensure self - // source join can return correct result. - if ctx.session_ctx().config().get_enable_share_plan() && for_stream { - // Common sub-plan detection. - plan = plan.merge_eq_nodes(); - plan = plan.prune_share(); - if explain_trace { - ctx.trace("Merging equivalent nodes:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - } else { - plan = self.optimize_by_rules( - plan, - "DAG To Tree".to_string(), - vec![DagToTreeRule::create()], - ApplyOrder::TopDown, - ); - - // Replace source to share source. - // Perform share source at the beginning so that we can benefit from predicate pushdown - // and column pruning for the share operator. - if for_stream { - plan = ShareSourceRewriter::share_source(plan); - if explain_trace { - ctx.trace("Share Source:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - } - } - - plan = self.optimize_by_rules( - plan, - "Rewrite Like Expr".to_string(), - vec![RewriteLikeExprRule::create()], - ApplyOrder::TopDown, - ); - - // Simple Unnesting. - plan = self.optimize_by_rules( - plan, - "Simple Unnesting".to_string(), - vec![ - // Eliminate max one row - MaxOneRowEliminateRule::create(), - // Convert apply to join. - ApplyToJoinRule::create(), - // Pull correlated predicates up the algebra tree to unnest simple subquery. - PullUpCorrelatedPredicateRule::create(), - ], - ApplyOrder::TopDown, - ); - if HasMaxOneRowApply().visit(plan.clone()) { - return Err(ErrorCode::InternalError( - "Scalar subquery might produce more than one row.".into(), - ) - .into()); - } - - plan = self.optimize_by_rules( - plan, - "Union Merge".to_string(), - vec![UnionMergeRule::create()], - ApplyOrder::BottomUp, - ); - - // Predicate push down before translate apply, because we need to calculate the domain - // and predicate push down can reduce the size of domain. - plan = plan.predicate_pushdown( - Condition::true_cond(), - &mut PredicatePushdownContext::new(plan.clone()), - ); - if explain_trace { - ctx.trace("Predicate Push Down:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - // General Unnesting. - // Translate Apply, push Apply down the plan and finally replace Apply with regular inner - // join. - plan = self.optimize_by_rules( - plan, - "General Unnesting(Translate Apply)".to_string(), - vec![TranslateApplyRule::create()], - ApplyOrder::BottomUp, - ); - plan = self.optimize_by_rules_until_fix_point( - plan, - "General Unnesting(Push Down Apply)".to_string(), - vec![ - ApplyAggTransposeRule::create(), - ApplyFilterTransposeRule::create(), - ApplyProjectTransposeRule::create(), - ApplyJoinTransposeRule::create(), - ApplyShareEliminateRule::create(), - ApplyScanRule::create(), - ], - ApplyOrder::TopDown, - ); - if has_logical_apply(plan.clone()) { - return Err(ErrorCode::InternalError("Subquery can not be unnested.".into()).into()); - } - - // Predicate Push-down - plan = plan.predicate_pushdown( - Condition::true_cond(), - &mut PredicatePushdownContext::new(plan.clone()), - ); - if explain_trace { - ctx.trace("Predicate Push Down:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - // Merge inner joins and intermediate filters into multijoin - // This rule assumes that filters have already been pushed down near to - // their relevant joins. - plan = self.optimize_by_rules( - plan, - "To MultiJoin".to_string(), - vec![MergeMultiJoinRule::create()], - ApplyOrder::TopDown, - ); - - // Reorder multijoin into left-deep join tree. - plan = self.optimize_by_rules( - plan, - "Join Reorder".to_string(), - vec![ReorderMultiJoinRule::create()], - ApplyOrder::TopDown, - ); - - // Predicate Push-down: apply filter pushdown rules again since we pullup all join - // conditions into a filter above the multijoin. - plan = plan.predicate_pushdown( - Condition::true_cond(), - &mut PredicatePushdownContext::new(plan.clone()), - ); - if explain_trace { - ctx.trace("Predicate Push Down:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - // If for stream, push down predicates with now into a left-semi join - if for_stream { - plan = self.optimize_by_rules( - plan, - "Push down filter with now into a left semijoin".to_string(), - vec![FilterWithNowToJoinRule::create()], - ApplyOrder::TopDown, - ); - } - - // Push down the calculation of inputs of join's condition. - plan = self.optimize_by_rules( - plan, - "Push Down the Calculation of Inputs of Join's Condition".to_string(), - vec![PushCalculationOfJoinRule::create()], - ApplyOrder::TopDown, - ); - - // Prune Columns - // - // Currently, the expressions in ORDER BY will be merged into the expressions in SELECT and - // they shouldn't be a part of output columns, so we use `out_fields` to control the - // visibility of these expressions. To avoid these expressions being pruned, we can't use - // `self.out_fields` as `required_cols` here. - let required_cols = (0..self.plan.schema().len()).collect_vec(); - let mut column_pruning_ctx = ColumnPruningContext::new(plan.clone()); - plan = plan.prune_col(&required_cols, &mut column_pruning_ctx); - // Column pruning may introduce additional projects, and filter can be pushed again. - if explain_trace { - ctx.trace("Prune Columns:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - if column_pruning_ctx.need_second_round() { - // Second round of column pruning and reuse the column pruning context. - // Try to replace original share operator with the new one. - plan = plan.prune_col(&required_cols, &mut column_pruning_ctx); - if explain_trace { - ctx.trace("Prune Columns (For DAG):"); - ctx.trace(plan.explain_to_string().unwrap()); - } - } - - plan = plan.predicate_pushdown( - Condition::true_cond(), - &mut PredicatePushdownContext::new(plan.clone()), - ); - if explain_trace { - ctx.trace("Predicate Push Down:"); - ctx.trace(plan.explain_to_string().unwrap()); - } - - // Convert distinct aggregates. - plan = self.optimize_by_rules( - plan, - "Convert Distinct Aggregation".to_string(), - vec![ - UnionToDistinctRule::create(), - DistinctAggRule::create(for_stream), - ], - ApplyOrder::TopDown, - ); - - plan = self.optimize_by_rules( - plan, - "Join Commute".to_string(), - vec![JoinCommuteRule::create()], - ApplyOrder::TopDown, - ); - - plan = self.optimize_by_rules( - plan, - "Project Remove".to_string(), - vec![ - // merge should be applied before eliminate - ProjectMergeRule::create(), - ProjectEliminateRule::create(), - TrivialProjectToValuesRule::create(), - UnionInputValuesMergeRule::create(), - // project-join merge should be applied after merge - // eliminate and to values - ProjectJoinMergeRule::create(), - AggProjectMergeRule::create(), - ], - ApplyOrder::BottomUp, - ); - - plan = self.optimize_by_rules( - plan, - "Convert Window Aggregation".to_string(), - vec![ - OverAggToTopNRule::create(), - ProjectMergeRule::create(), - ProjectEliminateRule::create(), - TrivialProjectToValuesRule::create(), - UnionInputValuesMergeRule::create(), - ], - ApplyOrder::TopDown, - ); - - if has_logical_over_agg(plan.clone()) { - return Err(ErrorCode::InternalError(format!( - "OverAgg can not be transformed. Plan:\n{}", - plan.explain_to_string().unwrap() - )) - .into()); - } - - plan = self.optimize_by_rules( - plan, - "Dedup Group keys".to_string(), - vec![AggDedupGroupKeyRule::create()], - ApplyOrder::TopDown, - ); - - #[cfg(debug_assertions)] - InputRefValidator.validate(plan.clone()); - - if ctx.is_explain_logical() { - ctx.store_logical(plan.explain_to_string().unwrap()); - } - - Ok(plan) + /// Apply logical optimization to the plan for batch. + pub fn gen_optimized_logical_plan_for_batch(&self) -> Result { + LogicalOptimizer::gen_optimized_logical_plan_for_batch(self.plan.clone()) } /// Optimize and generate a singleton batch physical plan without exchange nodes. fn gen_batch_plan(&mut self) -> Result { // Logical optimization - let mut plan = self.gen_optimized_logical_plan()?; - - // Convert the dag back to the tree, because we don't support physical dag plan for now. - plan = self.optimize_by_rules( - plan, - "DAG To Tree".to_string(), - vec![DagToTreeRule::create()], - ApplyOrder::TopDown, - ); - - plan = self.optimize_by_rules( - plan, - "Agg on Index".to_string(), - vec![TopNOnIndexRule::create()], - ApplyOrder::TopDown, - ); + let mut plan = self.gen_optimized_logical_plan_for_batch()?; // Convert to physical plan node plan = plan.to_batch_with_order_required(&self.required_order)?; @@ -621,10 +284,6 @@ impl PlanRoot { Ok(plan) } - pub fn gen_optimized_logical_plan_for_stream(&self) -> Result { - self.gen_optimized_logical_plan_inner(true) - } - /// Generate create index or create materialize view plan. fn gen_stream_plan(&mut self) -> Result { let ctx = self.plan.ctx(); @@ -661,12 +320,11 @@ impl PlanRoot { if ctx.session_ctx().config().get_streaming_enable_delta_join() { // TODO: make it a logical optimization. // Rewrite joins with index to delta join - plan = self.optimize_by_rules( - plan, - "To IndexDeltaJoin".to_string(), + plan = plan.optimize_by_rules(&OptimizationStage::new( + "To IndexDeltaJoin", vec![IndexDeltaJoinRule::create()], ApplyOrder::BottomUp, - ); + )); } // Inline session timezone diff --git a/src/frontend/src/optimizer/rule/translate_apply_rule.rs b/src/frontend/src/optimizer/rule/translate_apply_rule.rs index 5de1fe84bbbb..e78b4ed9f7f8 100644 --- a/src/frontend/src/optimizer/rule/translate_apply_rule.rs +++ b/src/frontend/src/optimizer/rule/translate_apply_rule.rs @@ -48,7 +48,10 @@ use crate::utils::{ColIndexMapping, Condition}; /// / \ /// Domain RHS /// ``` -pub struct TranslateApplyRule {} +pub struct TranslateApplyRule { + enable_share_plan: bool, +} + impl Rule for TranslateApplyRule { fn apply(&self, plan: PlanRef) -> Option { let apply: &LogicalApply = plan.as_logical_apply()?; @@ -96,7 +99,7 @@ impl Rule for TranslateApplyRule { // the domain. Distinct + Project + The Left of Apply // Use Share - left = if left.ctx().session_ctx().config().get_enable_share_plan() { + left = if self.enable_share_plan { let logical_share = LogicalShare::new(left); logical_share.into() } else { @@ -136,8 +139,8 @@ impl Rule for TranslateApplyRule { } impl TranslateApplyRule { - pub fn create() -> BoxedRule { - Box::new(TranslateApplyRule {}) + pub fn create(enable_share_plan: bool) -> BoxedRule { + Box::new(TranslateApplyRule { enable_share_plan }) } /// Rewrite `LogicalApply`'s left according to `correlated_indices`.