Skip to content

Commit

Permalink
opt: inline constant values for FK and uniqueness checks
Browse files Browse the repository at this point in the history
In FK and uniqueness checks, WithScans that buffer the mutation's input
are replaced with Values expression when inserted values are constant.
This is especially beneficial for `REGIONAL BY ROW` tables where the
`crdb_region` column is a computed column dependent on a FK. Because the
constant values are inlined, the optimizer is not able to reduce a FK
check that scans multiple regions with a FK check that checks only a
single region.

Informs #63882

Release note (performance improvement): Previously, foreign key checks
performed for inserts into `REGIONAL BY ROW` tables always searched the
parent table in all regions to check for FK violations. Now, only a
single region is searched if constant values are inserted and the
`crdb_region` column is a computed column dependent on the FK column.
  • Loading branch information
mgartner committed May 13, 2021
1 parent 79fe0c7 commit 79293dc
Show file tree
Hide file tree
Showing 21 changed files with 1,091 additions and 808 deletions.
17 changes: 12 additions & 5 deletions pkg/ccl/logictestccl/testdata/logic_test/multi_region_tpcc
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ EXPLAIN INSERT
INTO
history (h_c_id, h_c_d_id, h_c_w_id, h_d_id, h_w_id, h_amount, h_date, h_data)
VALUES
(2057, 4, 3, 4, 3, 2100.9, '2021-04-15 15:22:14', '9 zmssaF9m')
(2057, 4, 3, 4, 3, 2100.9, '2021-04-15 15:22:14', '9 zmssaF9m'),
(2058, 4, 3, 4, 3, 2100.9, '2021-04-15 15:22:14', '9 zmssaF9m')
----
distribution: local
vectorized: true
Expand All @@ -204,8 +205,14 @@ vectorized: true
│ └── • buffer
│ │ label: buffer 1
│ │
│ └── • values
│ size: 11 columns, 1 row
│ └── • render
│ │ estimated row count: 2
│ │
│ └── • render
│ │ estimated row count: 2
│ │
│ └── • values
│ size: 8 columns, 2 rows
├── • constraint-check
│ │
Expand All @@ -218,7 +225,7 @@ vectorized: true
│ │ lookup condition: (((column3 = c_w_id) AND (column2 = c_d_id)) AND (column1 = c_id)) AND (crdb_region IN ('ca-central-1', 'us-east-1'))
│ │
│ └── • lookup join (anti)
│ │ estimated row count: 1
│ │ estimated row count: 2
│ │ table: customer@primary
│ │ equality cols are key
│ │ lookup condition: (((column3 = c_w_id) AND (column2 = c_d_id)) AND (column1 = c_id)) AND (crdb_region = 'ap-southeast-2')
Expand All @@ -237,7 +244,7 @@ vectorized: true
│ lookup condition: ((column5 = d_w_id) AND (column4 = d_id)) AND (crdb_region IN ('ca-central-1', 'us-east-1'))
└── • lookup join (anti)
│ estimated row count: 1
│ estimated row count: 2
│ table: district@primary
│ equality cols are key
│ lookup condition: ((column5 = d_w_id) AND (column4 = d_id)) AND (crdb_region = 'ap-southeast-2')
Expand Down
124 changes: 51 additions & 73 deletions pkg/ccl/logictestccl/testdata/logic_test/partitioning_implicit
Original file line number Diff line number Diff line change
Expand Up @@ -238,44 +238,42 @@ vectorized: true
├── • insert
│ │ into: fk_using_implicit_columns_against_t(pk, ref_t_pk, ref_t_c)
│ │
│ └── • buffer
│ │ label: buffer 1
│ │
│ └── • values
│ size: 3 columns, 1 row
│ └── • values
│ size: 3 columns, 1 row
├── • constraint-check
│ │
│ └── • error if rows
│ │
│ └── • hash join (right anti)
│ │ equality: (pk) = (column2)
│ │ left cols are key
│ │ right cols are key
│ └── • cross join (anti)
│ │
│ ├── • scan
│ │ missing stats
│ │ table: t@t_b_idx
│ │ spans: FULL SCAN
│ ├── • values
│ │ size: 1 column, 1 row
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • filter
│ │ filter: pk = 1
│ │
│ └── • scan
│ missing stats
│ table: t@primary
│ spans: [ - /0/1] [/1/1 - /1/1] [/2/1 - ]
└── • constraint-check
└── • error if rows
└── • hash join (right anti)
│ equality: (c) = (column3)
│ right cols are key
└── • cross join (anti)
├── • scan
│ missing stats
│ table: t@t_c_key
│ spans: FULL SCAN
├── • values
│ size: 1 column, 1 row
└── • scan buffer
label: buffer 1
└── • filter
│ filter: c = 4
└── • scan
missing stats
table: t@t_c_key
spans: [ - /2/4] [/3/4 - /3/4] [/4/4 - ]

statement ok
INSERT INTO fk_using_implicit_columns_against_t VALUES (1, 1, 4)
Expand Down Expand Up @@ -701,74 +699,54 @@ vectorized: true
├── • insert
│ │ into: t(pk, pk2, partition_by, a, b, c, d)
│ │
│ └── • buffer
│ │ label: buffer 1
│ │
│ └── • values
│ size: 8 columns, 1 row
│ └── • values
│ size: 8 columns, 1 row
├── • constraint-check
│ │
│ └── • error if rows
│ │
│ └── • hash join (right semi)
│ │ equality: (pk) = (column1)
│ │ right cols are key
│ │ pred: column3 != partition_by
│ └── • cross join
│ │
│ ├── • scan
│ │ missing stats
│ │ table: t@t_a_idx
│ │ spans: FULL SCAN
│ ├── • values
│ │ size: 1 column, 1 row
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • limit
│ │ count: 1
│ │
│ └── • filter
│ │ filter: pk = 1
│ │
│ └── • scan
│ missing stats
│ table: t@t_a_idx
│ spans: [ - /0] [/2 - ]
├── • constraint-check
│ │
│ └── • error if rows
│ │
│ └── • hash join (right semi)
│ │ equality: (b) = (column5)
│ │ right cols are key
│ │ pred: (column1 != pk) OR (column3 != partition_by)
│ └── • cross join
│ │
│ ├── • scan
│ │ missing stats
│ │ table: t@t_b_key
│ │ spans: FULL SCAN
│ ├── • values
│ │ size: 1 column, 1 row
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • limit
│ │ count: 1
│ │
│ └── • filter
│ │ filter: (b = 1) AND ((pk != 1) OR (partition_by != 1))
│ │
│ └── • scan
│ missing stats
│ table: t@t_b_key
│ spans: FULL SCAN
└── • constraint-check
└── • error if rows
└── • limit
│ count: 1
└── • lookup join
│ table: t@primary
│ equality: (partition_by, pk) = (partition_by,pk)
│ equality cols are key
└── • hash join
│ equality: (c) = (column6)
│ right cols are key
│ pred: (column1 != pk) OR (column3 != partition_by)
├── • scan
│ missing stats
│ table: t@t_c_key (partial index)
│ spans: FULL SCAN
└── • filter
│ estimated row count: 1
│ filter: column7 > 100
└── • scan buffer
label: buffer 1
└── • norows

statement ok
INSERT INTO t VALUES (1, 1, 1, 1, 1, 1, 1), (2, 2, 2, 2, 2, 2, 2)
Expand Down
97 changes: 77 additions & 20 deletions pkg/ccl/logictestccl/testdata/logic_test/regional_by_row
Original file line number Diff line number Diff line change
Expand Up @@ -930,18 +930,23 @@ SELECT * FROM [EXPLAIN INSERT INTO child VALUES (1, 1)] OFFSET 2
└── • error if rows
└── • lookup join (anti)
table: parent@primary
│ equality cols are key
lookup condition: (column2 = p_id) AND (crdb_region IN ('ca-central-1', 'us-east-1'))
└── • cross join (anti)
├── • values
size: 1 column, 1 row
└── • lookup join (anti)
│ table: parent@primary
│ equality cols are key
│ lookup condition: (column2 = p_id) AND (crdb_region = 'ap-southeast-2')
└── • union all
│ limit: 1
└── • scan buffer
label: buffer 1
├── • scan
│ missing stats
│ table: parent@primary
│ spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1]
└── • scan
missing stats
table: parent@primary
spans: [/'ca-central-1'/1 - /'ca-central-1'/1] [/'us-east-1'/1 - /'us-east-1'/1]

query T
SELECT * FROM [EXPLAIN UPSERT INTO child VALUES (1, 1)] OFFSET 2
Expand Down Expand Up @@ -980,18 +985,23 @@ SELECT * FROM [EXPLAIN UPSERT INTO child VALUES (1, 1)] OFFSET 2
└── • error if rows
└── • lookup join (anti)
table: parent@primary
│ equality cols are key
lookup condition: (column2 = p_id) AND (crdb_region IN ('ca-central-1', 'us-east-1'))
└── • cross join (anti)
├── • values
size: 1 column, 1 row
└── • lookup join (anti)
│ table: parent@primary
│ equality cols are key
│ lookup condition: (column2 = p_id) AND (crdb_region = 'ap-southeast-2')
└── • union all
│ limit: 1
└── • scan buffer
label: buffer 1
├── • scan
│ missing stats
│ table: parent@primary
│ spans: [/'ap-southeast-2'/1 - /'ap-southeast-2'/1]
└── • scan
missing stats
table: parent@primary
spans: [/'ca-central-1'/1 - /'ca-central-1'/1] [/'us-east-1'/1 - /'us-east-1'/1]

# We don't yet support locality optimized search for semi join.
query T
Expand Down Expand Up @@ -1035,6 +1045,53 @@ SELECT * FROM [EXPLAIN DELETE FROM parent WHERE p_id = 1] OFFSET 2
└── • scan buffer
label: buffer 1

# An insert FK check only needs to search a single region when the region is a
# computed column dependent on the FK column and a constant value is inserted.
statement ok
CREATE TABLE parent_comp (
pk INT PRIMARY KEY,
crdb_region crdb_internal_region AS (
CASE WHEN pk <= 10 THEN 'us-east-1' ELSE 'ap-southeast-2' END
) STORED,
FAMILY (pk, crdb_region)
) LOCALITY REGIONAL BY ROW;

statement ok
CREATE TABLE child_comp (
pk INT PRIMARY KEY,
p_pk INT REFERENCES parent_comp (pk),
crdb_region crdb_internal_region AS (
CASE WHEN pk <= 10 THEN 'us-east-1' ELSE 'ap-southeast-2' END
) STORED,
FAMILY (pk, p_pk, crdb_region)
) LOCALITY REGIONAL BY ROW;

query T
SELECT * FROM [EXPLAIN INSERT INTO child_comp VALUES (1, 1)] OFFSET 2
----
·
• root
├── • insert
│ │ into: child_comp(pk, p_pk, crdb_region)
│ │
│ └── • values
│ size: 4 columns, 1 row
└── • constraint-check
└── • error if rows
└── • cross join (anti)
├── • values
│ size: 1 column, 1 row
└── • scan
missing stats
table: parent_comp@primary
spans: [/'us-east-1'/1 - /'us-east-1'/1]

# Tests creating a index and a unique constraint on a REGIONAL BY ROW table.
statement ok
CREATE INDEX new_idx ON regional_by_row_table(a, b)
Expand Down
46 changes: 46 additions & 0 deletions pkg/sql/opt/memo/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,52 @@ func (prj *ProjectExpr) InternalFDs() *props.FuncDepSet {
return &prj.internalFuncDeps
}

// FindInlinableConstants returns the set of input columns that are synthesized
// constant value expressions: ConstOp, TrueOp, FalseOp, or NullOp. Constant
// value expressions can often be inlined into referencing expressions. Only
// Project and Values operators synthesize constant value expressions.
func FindInlinableConstants(input RelExpr) opt.ColSet {
var cols opt.ColSet
if project, ok := input.(*ProjectExpr); ok {
for i := range project.Projections {
item := &project.Projections[i]
if opt.IsConstValueOp(item.Element) {
cols.Add(item.Col)
}
}
} else if values, ok := input.(*ValuesExpr); ok && len(values.Rows) == 1 {
tup := values.Rows[0].(*TupleExpr)
for i, scalar := range tup.Elems {
if opt.IsConstValueOp(scalar) {
cols.Add(values.Cols[i])
}
}
}
return cols
}

// ExtractColumnFromProjectOrValues searches a Project or Values input
// expression for the column having the given id. It returns the expression for
// that column.
func ExtractColumnFromProjectOrValues(input RelExpr, col opt.ColumnID) opt.ScalarExpr {
if project, ok := input.(*ProjectExpr); ok {
for i := range project.Projections {
item := &project.Projections[i]
if item.Col == col {
return item.Element
}
}
} else if values, ok := input.(*ValuesExpr); ok && len(values.Rows) == 1 {
tup := values.Rows[0].(*TupleExpr)
for i, scalar := range tup.Elems {
if values.Cols[i] == col {
return scalar
}
}
}
panic(errors.AssertionFailedf("could not find column to extract"))
}

// ExprIsNeverNull makes a best-effort attempt to prove that the provided
// scalar is always non-NULL, given the set of outer columns that are known
// to be not null. This is particularly useful with check constraints.
Expand Down
Loading

0 comments on commit 79293dc

Please sign in to comment.