Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: split a disjunction of equijoin predicates into a union of joins #74303

Merged
merged 1 commit into from
Apr 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
968 changes: 968 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/disjunction_in_join

Large diffs are not rendered by default.

79 changes: 79 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inner-join
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@ SELECT * from abc WHERE EXISTS (SELECT * FROM def WHERE a=d AND c=e)
1 1 2
2 1 1

# Exists with primary key columns selected
query III rowsort
SELECT a, b, c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----
1 1 2
2 1 1
2 2 NULL

# Exists with primary key columns not selected
query I rowsort
SELECT c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----
2
1
NULL

# Not Exists with primary key columns selected
query III rowsort
SELECT a, b, c FROM abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----

# Not Exists with primary key columns not selected
query I rowsort
SELECT c FROM abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----


# A semi-join emits exactly one row for every matching row in the LHS.
# The following test ensures that the SemiJoin doesn't commute into an
# InnerJoin as that guarantee would be lost.
Expand All @@ -38,11 +65,28 @@ INSERT INTO abc VALUES (1, 1, 1)
statement ok
INSERT INTO def VALUES (1, 1, 1), (2, 1, 1)

# Exists with primary key columns selected
query III rowsort
SELECT a, b, c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----
1 1 1

# Exists with primary key columns not selected
query I rowsort
SELECT c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----
1

# Not Exists with primary key columns selected
query III rowsort
SELECT a, b, c FROM abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----

# Not Exists with primary key columns not selected
query I rowsort
SELECT c FROM abc WHERE NOT EXISTS (SELECT * FROM def WHERE a=d OR a=e)
----

# Given that we know the reason the above query would fail if an InnerJoin
# was used - multiple rows emitted for each matching row in the LHS - we
# might think that adding a DistinctOn over the InnerJoin would help.
Expand All @@ -58,6 +102,21 @@ SELECT a, b, c FROM abc WHERE EXISTS (SELECT * FROM def WHERE a=d OR a=e)
#
# This tests that the InnerJoin commute rule for semi joins behaves sanely in
# these cases.

# InnerJoin with primary key columns selected
query III rowsort
SELECT a, b, c FROM abc, def WHERE a=d OR a=e
----
1 1 1
1 1 1

# InnerJoin with primary key columns not selected
query I rowsort
SELECT c FROM abc, def WHERE a=d OR a=e
----
1
1

statement ok
CREATE TABLE abc_decimal (a DECIMAL, b DECIMAL, c DECIMAL);
INSERT INTO abc_decimal VALUES (1, 1, 1), (1, 1, 1), (1.0, 1.0, 1.0), (1.00, 1.00, 1.00)
Expand All @@ -73,3 +132,23 @@ SELECT a, b, c FROM abc_decimal WHERE EXISTS (SELECT * FROM def_decimal WHERE a:
1 1 1
1.0 1.0 1.0
1.00 1.00 1.00

query RRR rowsort
SELECT a, b, c FROM abc_decimal WHERE EXISTS (SELECT * FROM def_decimal WHERE a::string=d::string or a::string=e::string)
----
1 1 1
1 1 1
1.0 1.0 1.0
1.00 1.00 1.00

query RRR rowsort
SELECT a, b, c FROM abc_decimal, def_decimal WHERE a::string=d::string or a::string=e::string
----
1 1 1
1 1 1
1.0 1.0 1.0
1.00 1.00 1.00

query RRR rowsort
SELECT a, b, c FROM abc_decimal WHERE NOT EXISTS (SELECT * FROM def_decimal WHERE a::string=d::string or a::string=e::string)
----
115 changes: 115 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -550,3 +550,118 @@ WHERE
t.oid
NOT IN (SELECT (ARRAY[704, 11676, 10005, 3912, 11765, 59410, 11397])[i] FROM generate_series(1, 376) AS i)
----

statement ok
ALTER TABLE abc INJECT STATISTICS '[
{
"columns": ["a"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 10000
}
]'

statement ok
ALTER TABLE abc INJECT STATISTICS '[
{
"columns": ["b"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10000,
"distinct_count": 10000
}
]'

statement ok
ALTER TABLE xyz INJECT STATISTICS '[
{
"columns": ["x"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 1000,
"distinct_count": 1000
}
]'

statement ok
ALTER TABLE xyz INJECT STATISTICS '[
{
"columns": ["y"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 1000,
"distinct_count": 1000
}
]'

statement ok
INSERT INTO xyz VALUES(5, 4, 7)

statement ok
INSERT INTO abc VALUES(12, 13, 14)

statement ok
CREATE INDEX abc_b ON abc(b)

statement ok
CREATE INDEX xyz_y ON xyz(y)

### Split Disjunctions Tests
query III rowsort
SELECT * FROM abc WHERE EXISTS (SELECT * FROM xyz WHERE abc.a = xyz.x OR abc.b = xyz.y)
----
4 5 6
7 8 9
2 5 6

query III rowsort
SELECT * FROM abc WHERE EXISTS (SELECT * FROM xyz WHERE abc.a = xyz.y OR abc.b = xyz.x)
----
2 5 6
4 5 6
12 13 14

query III rowsort
SELECT * FROM abc WHERE EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.x OR abc.b = xyz.y)and abc.a > 3 AND xyz.z > 10)
----
7 8 9

query III rowsort
SELECT * FROM abc WHERE EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.y OR abc.b = xyz.x) AND abc.a > 3 AND xyz.z > 10)
----
12 13 14

query III rowsort
SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE abc.a = xyz.x OR abc.b = xyz.y)
----
12 13 14

query III rowsort
SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE abc.a = xyz.y OR abc.b = xyz.x)
----
7 8 9

query III rowsort
SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.x OR abc.b = xyz.y)and abc.a > 3 AND xyz.z > 10)
----
2 5 6
4 5 6
12 13 14

query III rowsort
SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.y OR abc.b = xyz.x) AND abc.a > 3 AND xyz.z > 10)
----
2 5 6
4 5 6
7 8 9

query III rowsort
SELECT * FROM abc WHERE EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.x OR abc.b = xyz.y) AND (abc.a = xyz.y OR abc.b = xyz.y))
----
4 5 6
2 5 6

query III rowsort
SELECT * FROM abc WHERE NOT EXISTS (SELECT * FROM xyz WHERE (abc.a = xyz.x OR abc.b = xyz.y) AND (abc.a = xyz.y OR abc.b = xyz.y))
----
7 8 9
12 13 14

### End Split Disjunctions Tests
Loading