From 503ef79bb92fb387efbd8aee7819f879a0c7504b Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Sat, 14 Nov 2020 18:26:06 +1100 Subject: [PATCH 1/6] packer: tweak timeouts Tweak timeouts for slower connections and the abundance of more tests. Release note: None --- build/packer/teamcity-agent.json | 3 ++- build/packer/teamcity-agent.sh | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/build/packer/teamcity-agent.json b/build/packer/teamcity-agent.json index e28d2813a4f9..99ae97e58771 100644 --- a/build/packer/teamcity-agent.json +++ b/build/packer/teamcity-agent.json @@ -13,7 +13,8 @@ "image_description": "{{user `image_id`}}", "ssh_username": "packer", "disk_size": 256, - "disk_type": "pd-ssd" + "disk_type": "pd-ssd", + "state_timeout": "15m" }], "provisioners": [{ diff --git a/build/packer/teamcity-agent.sh b/build/packer/teamcity-agent.sh index 860ff15e01f3..764985d74f74 100644 --- a/build/packer/teamcity-agent.sh +++ b/build/packer/teamcity-agent.sh @@ -87,7 +87,7 @@ do git clean -dxf git checkout "$branch" - COCKROACH_BUILDER_CCACHE=1 build/builder.sh make test testrace TESTS=- + COCKROACH_BUILDER_CCACHE=1 build/builder.sh make test testrace TESTTIMEOUT=45m TESTS=- # TODO(benesch): store the acceptanceversion somewhere more accessible. docker pull $(git grep cockroachdb/acceptance -- '*.go' | sed -E 's/.*"([^"]*).*"/\1/') || true done From dcd0f80e2606322949beaf4c195680aea988d5c5 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Wed, 18 Nov 2020 13:08:31 +1100 Subject: [PATCH 2/6] sql: rename SURVIVE AVAILABILITY ZONE FAILURE to SURVIVE ZONE FAILURE Release note (sql change): SURVIVE AVAILABILITY ZONE FAILURE is now SURVIVE ZONE FAILURE. --- docs/generated/sql/bnf/stmt_block.bnf | 3 +-- pkg/sql/logictest/testdata/logic_test/multiregion | 2 +- pkg/sql/parser/parse_test.go | 4 ++-- pkg/sql/parser/sql.y | 7 +++---- pkg/sql/sem/tree/survive.go | 8 ++++---- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 18c33ac28019..4406ab67a78e 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -774,7 +774,6 @@ unreserved_keyword ::= | 'AT' | 'ATTRIBUTE' | 'AUTOMATIC' - | 'AVAILABILITY' | 'BACKUP' | 'BACKUPS' | 'BEFORE' @@ -2143,7 +2142,7 @@ region_name ::= survive_clause ::= 'SURVIVE' 'REGION' 'FAILURE' - | 'SURVIVE' 'AVAILABILITY' 'ZONE' 'FAILURE' + | 'SURVIVE' 'ZONE' 'FAILURE' | 'SURVIVE' 'DEFAULT' role_option ::= diff --git a/pkg/sql/logictest/testdata/logic_test/multiregion b/pkg/sql/logictest/testdata/logic_test/multiregion index 3363ff8cd18d..aa0aac63d7bc 100644 --- a/pkg/sql/logictest/testdata/logic_test/multiregion +++ b/pkg/sql/logictest/testdata/logic_test/multiregion @@ -21,7 +21,7 @@ statement error region "test1" defined multiple times CREATE DATABASE duplicate_region_name_db REGIONS "test1", "test1" statement error implementation pending -CREATE DATABASE new_db SURVIVE AVAILABILITY ZONE FAILURE +CREATE DATABASE new_db SURVIVE ZONE FAILURE statement ok CREATE DATABASE new_db diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 5b79f3807d78..8ab86f0b990b 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -73,7 +73,7 @@ func TestParse(t *testing.T) { {`CREATE DATABASE a CONNECTION LIMIT = 13`}, {`CREATE DATABASE a REGIONS = "us-west-1", "us-west-2"`}, {`CREATE DATABASE a SURVIVE REGION FAILURE`}, - {`CREATE DATABASE a SURVIVE AVAILABILITY ZONE FAILURE`}, + {`CREATE DATABASE a SURVIVE ZONE FAILURE`}, {`CREATE DATABASE IF NOT EXISTS a`}, {`CREATE DATABASE IF NOT EXISTS a TEMPLATE = 'template0'`}, {`CREATE DATABASE IF NOT EXISTS a TEMPLATE = 'invalid'`}, @@ -86,7 +86,7 @@ func TestParse(t *testing.T) { {`CREATE DATABASE IF NOT EXISTS a TEMPLATE = 'template0' ENCODING = 'UTF8' LC_COLLATE = 'C.UTF-8' LC_CTYPE = 'INVALID'`}, {`CREATE DATABASE IF NOT EXISTS a REGIONS = "us-west-1", "us-west-2"`}, {`CREATE DATABASE IF NOT EXISTS a SURVIVE REGION FAILURE`}, - {`CREATE DATABASE IF NOT EXISTS a SURVIVE AVAILABILITY ZONE FAILURE`}, + {`CREATE DATABASE IF NOT EXISTS a SURVIVE ZONE FAILURE`}, {`CREATE SCHEMA IF NOT EXISTS foo`}, {`CREATE SCHEMA foo`}, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index a60da8f9daa4..048e19d061d9 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -596,7 +596,7 @@ func (u *sqlSymUnion) objectNamePrefixList() tree.ObjectNamePrefixList { // Ordinary key words in alphabetical order. %token ABORT ACCESS ACTION ADD ADMIN AFFINITY AFTER AGGREGATE %token ALL ALTER ALWAYS ANALYSE ANALYZE AND AND_AND ANY ANNOTATE_TYPE ARRAY AS ASC -%token ASYMMETRIC AT ATTRIBUTE AUTHORIZATION AUTOMATIC AVAILABILITY +%token ASYMMETRIC AT ATTRIBUTE AUTHORIZATION AUTOMATIC %token BACKUP BACKUPS BEFORE BEGIN BETWEEN BIGINT BIGSERIAL BINARY BIT %token BUCKET_COUNT @@ -7592,9 +7592,9 @@ survive_clause: { $$.val = tree.SurviveRegionFailure } -| SURVIVE AVAILABILITY ZONE FAILURE +| SURVIVE ZONE FAILURE { - $$.val = tree.SurviveAvailabilityZoneFailure + $$.val = tree.SurviveZoneFailure } | SURVIVE DEFAULT { @@ -11861,7 +11861,6 @@ unreserved_keyword: | AT | ATTRIBUTE | AUTOMATIC -| AVAILABILITY | BACKUP | BACKUPS | BEFORE diff --git a/pkg/sql/sem/tree/survive.go b/pkg/sql/sem/tree/survive.go index d5133a9d188a..48a46ba5d005 100644 --- a/pkg/sql/sem/tree/survive.go +++ b/pkg/sql/sem/tree/survive.go @@ -20,9 +20,9 @@ const ( // SurviveRegionFailure indicates a database being able to withstand // an entire region failure. SurviveRegionFailure - // SurviveAvailabilityZoneFailure indicates a database being able to + // SurviveyZoneFailure indicates a database being able to // withstand a failure of an availibility zone. - SurviveAvailabilityZoneFailure + SurviveZoneFailure ) // Format implements the NodeFormatter interface. @@ -30,8 +30,8 @@ func (node *Survive) Format(ctx *FmtCtx) { switch *node { case SurviveRegionFailure: ctx.WriteString("SURVIVE REGION FAILURE") - case SurviveAvailabilityZoneFailure: - ctx.WriteString("SURVIVE AVAILABILITY ZONE FAILURE") + case SurviveZoneFailure: + ctx.WriteString("SURVIVE ZONE FAILURE") case SurviveDefault: ctx.WriteString("SURVIVE DEFAULT") } From 37e64de5fdbf0a44a65c3e99204e511bb6f47f64 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Wed, 18 Nov 2020 16:47:41 +1100 Subject: [PATCH 3/6] sql: remove SURVIVE DEFAULT; rename Survive to SurvivalGoal We still need a concept of "default" for tree.SurviveDefault so that we do not display it syntactically, but the value will be transformed before it is persisted. Release note: None --- docs/generated/sql/bnf/stmt_block.bnf | 1 - pkg/sql/alter_database.go | 6 ++-- pkg/sql/create_database.go | 2 +- pkg/sql/opaque.go | 6 ++-- pkg/sql/parser/parse_test.go | 4 --- pkg/sql/parser/sql.y | 24 +++++++--------- pkg/sql/sem/tree/BUILD.bazel | 2 +- pkg/sql/sem/tree/alter_database.go | 14 ++++----- pkg/sql/sem/tree/create.go | 6 ++-- pkg/sql/sem/tree/stmt.go | 8 +++--- pkg/sql/sem/tree/survival_goal.go | 41 +++++++++++++++++++++++++++ pkg/sql/sem/tree/survive.go | 38 ------------------------- 12 files changed, 73 insertions(+), 79 deletions(-) create mode 100644 pkg/sql/sem/tree/survival_goal.go delete mode 100644 pkg/sql/sem/tree/survive.go diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 4406ab67a78e..8794e9252dea 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -2143,7 +2143,6 @@ region_name ::= survive_clause ::= 'SURVIVE' 'REGION' 'FAILURE' | 'SURVIVE' 'ZONE' 'FAILURE' - | 'SURVIVE' 'DEFAULT' role_option ::= 'CREATEROLE' diff --git a/pkg/sql/alter_database.go b/pkg/sql/alter_database.go index 23808f0ff22e..18d5f6571bed 100644 --- a/pkg/sql/alter_database.go +++ b/pkg/sql/alter_database.go @@ -112,9 +112,9 @@ func (p *planner) AlterDatabaseDropRegion( return nil, unimplemented.New("alter database drop region", "implementation pending") } -// AlterDatabaseSurvive transforms a tree.AlterDatabaseSurvive into a plan node. -func (p *planner) AlterDatabaseSurvive( - ctx context.Context, n *tree.AlterDatabaseSurvive, +// AlterDatabaseSurvivalGoal transforms a tree.AlterDatabaseSurvivalGoal into a plan node. +func (p *planner) AlterDatabaseSurvivalGoal( + ctx context.Context, n *tree.AlterDatabaseSurvivalGoal, ) (planNode, error) { return nil, unimplemented.New("alter database survive", "implementation pending") } diff --git a/pkg/sql/create_database.go b/pkg/sql/create_database.go index 46f10fc79ffd..a8df8edeeefa 100644 --- a/pkg/sql/create_database.go +++ b/pkg/sql/create_database.go @@ -77,7 +77,7 @@ func (p *planner) CreateDatabase(ctx context.Context, n *tree.CreateDatabase) (p ) } - if n.Survive != tree.SurviveDefault { + if n.SurvivalGoal != tree.SurvivalGoalDefault { return nil, unimplemented.New("create database survive", "implementation pending") } diff --git a/pkg/sql/opaque.go b/pkg/sql/opaque.go index 09a08fe54a02..c7559d17a5c9 100644 --- a/pkg/sql/opaque.go +++ b/pkg/sql/opaque.go @@ -55,8 +55,8 @@ func buildOpaque( plan, err = p.AlterDatabaseAddRegion(ctx, n) case *tree.AlterDatabaseDropRegion: plan, err = p.AlterDatabaseDropRegion(ctx, n) - case *tree.AlterDatabaseSurvive: - plan, err = p.AlterDatabaseSurvive(ctx, n) + case *tree.AlterDatabaseSurvivalGoal: + plan, err = p.AlterDatabaseSurvivalGoal(ctx, n) case *tree.AlterIndex: plan, err = p.AlterIndex(ctx, n) case *tree.AlterSchema: @@ -198,7 +198,7 @@ func init() { &tree.AlterDatabaseAddRegion{}, &tree.AlterDatabaseDropRegion{}, &tree.AlterDatabaseOwner{}, - &tree.AlterDatabaseSurvive{}, + &tree.AlterDatabaseSurvivalGoal{}, &tree.AlterIndex{}, &tree.AlterSchema{}, &tree.AlterTable{}, diff --git a/pkg/sql/parser/parse_test.go b/pkg/sql/parser/parse_test.go index 8ab86f0b990b..3b156838cf3f 100644 --- a/pkg/sql/parser/parse_test.go +++ b/pkg/sql/parser/parse_test.go @@ -1775,10 +1775,6 @@ func TestParse2(t *testing.T) { `CREATE DATABASE a REGION "us-west-1"`, `CREATE DATABASE a REGIONS = "us-west-1"`, }, - { - `CREATE DATABASE IF NOT EXISTS a SURVIVE DEFAULT`, - `CREATE DATABASE IF NOT EXISTS a`, - }, {`CREATE TABLE a (b INT) WITH (fillfactor=100)`, `CREATE TABLE a (b INT8)`}, {`CREATE TABLE a (b INT, UNIQUE INDEX foo (b))`, diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index 048e19d061d9..44a796d0539a 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -566,8 +566,8 @@ func (u *sqlSymUnion) refreshDataOption() tree.RefreshDataOption { func (u *sqlSymUnion) regionAffinity() tree.RegionalAffinity { return u.val.(tree.RegionalAffinity) } -func (u *sqlSymUnion) survive() tree.Survive { - return u.val.(tree.Survive) +func (u *sqlSymUnion) survivalGoal() tree.SurvivalGoal { + return u.val.(tree.SurvivalGoal) } func (u *sqlSymUnion) objectNamePrefix() tree.ObjectNamePrefix { return u.val.(tree.ObjectNamePrefix) @@ -957,7 +957,7 @@ func (u *sqlSymUnion) objectNamePrefixList() tree.ObjectNamePrefixList { %type opt_regions_list %type region_name %type region_name_list -%type survive_clause opt_survive_clause +%type survive_clause opt_survive_clause %type regional_affinity %type opt_connection_limit @@ -1499,9 +1499,9 @@ alter_database_drop_region_stmt: alter_database_survive_stmt: ALTER DATABASE database_name survive_clause { - $$.val = &tree.AlterDatabaseSurvive{ + $$.val = &tree.AlterDatabaseSurvivalGoal{ Name: tree.Name($3), - Survive: $4.survive(), + SurvivalGoal: $4.survivalGoal(), } } @@ -7554,7 +7554,7 @@ create_database_stmt: CType: $8, ConnectionLimit: $9.int32(), Regions: $10.nameList(), - Survive: $11.survive(), + SurvivalGoal: $11.survivalGoal(), } } | CREATE DATABASE IF NOT EXISTS database_name opt_with opt_template_clause opt_encoding_clause opt_lc_collate_clause opt_lc_ctype_clause opt_connection_limit opt_regions_list opt_survive_clause @@ -7568,7 +7568,7 @@ create_database_stmt: CType: $11, ConnectionLimit: $12.int32(), Regions: $13.nameList(), - Survive: $14.survive(), + SurvivalGoal: $14.survivalGoal(), } } | CREATE DATABASE error // SHOW HELP: CREATE DATABASE @@ -7590,22 +7590,18 @@ region_or_regions: survive_clause: SURVIVE REGION FAILURE { - $$.val = tree.SurviveRegionFailure + $$.val = tree.SurvivalGoalRegionFailure } | SURVIVE ZONE FAILURE { - $$.val = tree.SurviveZoneFailure - } -| SURVIVE DEFAULT - { - $$.val = tree.SurviveDefault + $$.val = tree.SurvivalGoalZoneFailure } opt_survive_clause: survive_clause | /* EMPTY */ { - $$.val = tree.SurviveDefault + $$.val = tree.SurvivalGoalDefault } opt_template_clause: diff --git a/pkg/sql/sem/tree/BUILD.bazel b/pkg/sql/sem/tree/BUILD.bazel index c8d9b334aa65..ac2c8f6f2612 100644 --- a/pkg/sql/sem/tree/BUILD.bazel +++ b/pkg/sql/sem/tree/BUILD.bazel @@ -76,7 +76,7 @@ go_library( "split.go", "statementtype_string.go", "stmt.go", - "survive.go", + "survival_goal.go", "table_name.go", "table_pattern.go", "table_ref.go", diff --git a/pkg/sql/sem/tree/alter_database.go b/pkg/sql/sem/tree/alter_database.go index 9188c68fcef0..76417e455da8 100644 --- a/pkg/sql/sem/tree/alter_database.go +++ b/pkg/sql/sem/tree/alter_database.go @@ -60,18 +60,18 @@ func (node *AlterDatabaseDropRegion) Format(ctx *FmtCtx) { ctx.FormatNode(&node.Region) } -// AlterDatabaseSurvive represents a ALTER DATABASE SURVIVE ... statement. -type AlterDatabaseSurvive struct { - Name Name - Survive Survive +// AlterDatabaseSurvivalGoal represents a ALTER DATABASE SURVIVE ... statement. +type AlterDatabaseSurvivalGoal struct { + Name Name + SurvivalGoal SurvivalGoal } -var _ Statement = &AlterDatabaseSurvive{} +var _ Statement = &AlterDatabaseSurvivalGoal{} // Format implements the NodeFormatter interface. -func (node *AlterDatabaseSurvive) Format(ctx *FmtCtx) { +func (node *AlterDatabaseSurvivalGoal) Format(ctx *FmtCtx) { ctx.WriteString("ALTER DATABASE ") ctx.FormatNode(&node.Name) ctx.WriteString(" ") - node.Survive.Format(ctx) + node.SurvivalGoal.Format(ctx) } diff --git a/pkg/sql/sem/tree/create.go b/pkg/sql/sem/tree/create.go index 9f0ef00cbc7c..c8a1248b1be0 100644 --- a/pkg/sql/sem/tree/create.go +++ b/pkg/sql/sem/tree/create.go @@ -45,7 +45,7 @@ type CreateDatabase struct { CType string ConnectionLimit int32 Regions NameList - Survive Survive + SurvivalGoal SurvivalGoal } // Format implements the NodeFormatter interface. @@ -79,9 +79,9 @@ func (node *CreateDatabase) Format(ctx *FmtCtx) { ctx.WriteString(" REGIONS = ") node.Regions.Format(ctx) } - if node.Survive != SurviveDefault { + if node.SurvivalGoal != SurvivalGoalDefault { ctx.WriteString(" ") - node.Survive.Format(ctx) + node.SurvivalGoal.Format(ctx) } } diff --git a/pkg/sql/sem/tree/stmt.go b/pkg/sql/sem/tree/stmt.go index 99a9b664433b..168f89b7c061 100644 --- a/pkg/sql/sem/tree/stmt.go +++ b/pkg/sql/sem/tree/stmt.go @@ -195,12 +195,12 @@ func (*AlterDatabaseDropRegion) StatementTag() string { return "ALTER DATABASE D func (*AlterDatabaseDropRegion) hiddenFromShowQueries() {} // StatementType implements the Statement interface. -func (*AlterDatabaseSurvive) StatementType() StatementType { return DDL } +func (*AlterDatabaseSurvivalGoal) StatementType() StatementType { return DDL } // StatementTag returns a short string identifying the type of statement. -func (*AlterDatabaseSurvive) StatementTag() string { return "ALTER DATABASE SURVIVE" } +func (*AlterDatabaseSurvivalGoal) StatementTag() string { return "ALTER DATABASE SURVIVE" } -func (*AlterDatabaseSurvive) hiddenFromShowQueries() {} +func (*AlterDatabaseSurvivalGoal) hiddenFromShowQueries() {} // StatementType implements the Statement interface. func (*AlterIndex) StatementType() StatementType { return DDL } @@ -1062,7 +1062,7 @@ func (n *AlterIndex) String() string { return AsString(n) } func (n *AlterDatabaseOwner) String() string { return AsString(n) } func (n *AlterDatabaseAddRegion) String() string { return AsString(n) } func (n *AlterDatabaseDropRegion) String() string { return AsString(n) } -func (n *AlterDatabaseSurvive) String() string { return AsString(n) } +func (n *AlterDatabaseSurvivalGoal) String() string { return AsString(n) } func (n *AlterSchema) String() string { return AsString(n) } func (n *AlterTable) String() string { return AsString(n) } func (n *AlterTableCmds) String() string { return AsString(n) } diff --git a/pkg/sql/sem/tree/survival_goal.go b/pkg/sql/sem/tree/survival_goal.go new file mode 100644 index 000000000000..29b90efcbdfc --- /dev/null +++ b/pkg/sql/sem/tree/survival_goal.go @@ -0,0 +1,41 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package tree + +import "github.com/cockroachdb/errors" + +// SurvivalGoal represents the desired survivability level +// for a given database. +type SurvivalGoal uint32 + +const ( + // SurvivalGoalDefault indicates default survive behavior. + // This will get translated to the appropriate value when persisted. + SurvivalGoalDefault SurvivalGoal = iota + // SurvivalGoalRegionFailure indicates a database being able to withstand + // an entire region failure. + SurvivalGoalRegionFailure + // SurvivalGoalZoneFailure indicates a database being able to + // withstand a failure of an availibility zone. + SurvivalGoalZoneFailure +) + +// Format implements the NodeFormatter interface. +func (node *SurvivalGoal) Format(ctx *FmtCtx) { + switch *node { + case SurvivalGoalRegionFailure: + ctx.WriteString("SURVIVE REGION FAILURE") + case SurvivalGoalZoneFailure: + ctx.WriteString("SURVIVE ZONE FAILURE") + default: + panic(errors.Newf("unknown survival goal: %d", *node)) + } +} diff --git a/pkg/sql/sem/tree/survive.go b/pkg/sql/sem/tree/survive.go deleted file mode 100644 index 48a46ba5d005..000000000000 --- a/pkg/sql/sem/tree/survive.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package tree - -// Survive represents the desired survivability level -// for a given database. -type Survive uint32 - -const ( - // SurviveDefault indicates default survive behavior. - SurviveDefault Survive = iota - // SurviveRegionFailure indicates a database being able to withstand - // an entire region failure. - SurviveRegionFailure - // SurviveyZoneFailure indicates a database being able to - // withstand a failure of an availibility zone. - SurviveZoneFailure -) - -// Format implements the NodeFormatter interface. -func (node *Survive) Format(ctx *FmtCtx) { - switch *node { - case SurviveRegionFailure: - ctx.WriteString("SURVIVE REGION FAILURE") - case SurviveZoneFailure: - ctx.WriteString("SURVIVE ZONE FAILURE") - case SurviveDefault: - ctx.WriteString("SURVIVE DEFAULT") - } -} From 58a085f82fe9692089183818230f674f8a7f6700 Mon Sep 17 00:00:00 2001 From: Paul Bardea Date: Wed, 18 Nov 2020 11:07:12 -0500 Subject: [PATCH 4/6] backupccl: skip TestRestoreMidSchemaChange under race This test is intended to verify that the particular set of backups taken of descriptors during a schema change. Not much is gained running these under race, but they are very slow so they should be skipped. Release note: None --- pkg/ccl/backupccl/restore_mid_schema_change_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/ccl/backupccl/restore_mid_schema_change_test.go b/pkg/ccl/backupccl/restore_mid_schema_change_test.go index 5587fabb8f12..fef219cd6202 100644 --- a/pkg/ccl/backupccl/restore_mid_schema_change_test.go +++ b/pkg/ccl/backupccl/restore_mid_schema_change_test.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/testutils" + "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -52,6 +53,9 @@ import ( func TestRestoreMidSchemaChange(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + + skip.UnderRaceWithIssue(t, 56584) + const ( testdataBase = "testdata/restore_mid_schema_change" exportDirs = testdataBase + "/exports" From 446430af8ebb90242b7ce3eb148328a52a624ef4 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 16 Nov 2020 13:45:56 -1000 Subject: [PATCH 5/6] colexec: clean up the hash joiner a bit When we have unmatched tuples from the left in case of left/full outer hash joins, we still copy the whole vectors according to `buildIdx` selection vector from the right side and then set nulls only for the tuples that didn't have a match. Such approach is a lot more performant then if we were to check for each tuple whether it had a match and copy only the matched ones. This works ok since we assume that `buildIdx` value for unmatched tuples is left as 0 which we didn't explicitly do in distinctCollect variation. Note that it still worked correctly in all cases (both in in-memory hash joiner and external hash joiner) because we're zeroing out the slice when resetting (which is important for the external hash joiner). Given the observation that we're now always setting the correct values in `buildIdx`, `probeIdx`, `probeRowUnmatched`, this commit additionally removes zeroing out of those slices during resetting. Release note: None --- pkg/sql/colexec/hashjoiner.eg.go | 50 +++++++++++++++++++----------- pkg/sql/colexec/hashjoiner.go | 10 +++--- pkg/sql/colexec/hashjoiner_tmpl.go | 25 +++++++++------ 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/pkg/sql/colexec/hashjoiner.eg.go b/pkg/sql/colexec/hashjoiner.eg.go index 14597d7e7ebf..6794993bad77 100644 --- a/pkg/sql/colexec/hashjoiner.eg.go +++ b/pkg/sql/colexec/hashjoiner.eg.go @@ -30,15 +30,16 @@ func collectProbeOuter_false( return nResults } - hj.probeState.probeRowUnmatched[nResults] = currentID == 0 - if currentID > 0 { - hj.probeState.buildIdx[nResults] = int(currentID - 1) - } else { - // If currentID == 0, then probeRowUnmatched will have been set - and - // we set the corresponding buildIdx to zero so that (as long as the - // build hash table has at least one row) we can copy the values vector - // without paying attention to probeRowUnmatched. + rowUnmatched := currentID == 0 + hj.probeState.probeRowUnmatched[nResults] = rowUnmatched + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. hj.probeState.buildIdx[nResults] = 0 + } else { + hj.probeState.buildIdx[nResults] = int(currentID - 1) } { var __retval_0 int @@ -76,15 +77,16 @@ func collectProbeOuter_true( return nResults } - hj.probeState.probeRowUnmatched[nResults] = currentID == 0 - if currentID > 0 { - hj.probeState.buildIdx[nResults] = int(currentID - 1) - } else { - // If currentID == 0, then probeRowUnmatched will have been set - and - // we set the corresponding buildIdx to zero so that (as long as the - // build hash table has at least one row) we can copy the values vector - // without paying attention to probeRowUnmatched. + rowUnmatched := currentID == 0 + hj.probeState.probeRowUnmatched[nResults] = rowUnmatched + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. hj.probeState.buildIdx[nResults] = 0 + } else { + hj.probeState.buildIdx[nResults] = int(currentID - 1) } { var __retval_0 int @@ -255,7 +257,13 @@ func distinctCollectProbeOuter_false(hj *hashJoiner, batchSize int, sel []int) { id := hj.ht.probeScratch.groupID[i] rowUnmatched := id == 0 hj.probeState.probeRowUnmatched[i] = rowUnmatched - if !rowUnmatched { + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. + hj.probeState.buildIdx[i] = 0 + } else { hj.probeState.buildIdx[i] = int(id - 1) } { @@ -282,7 +290,13 @@ func distinctCollectProbeOuter_true(hj *hashJoiner, batchSize int, sel []int) { id := hj.ht.probeScratch.groupID[i] rowUnmatched := id == 0 hj.probeState.probeRowUnmatched[i] = rowUnmatched - if !rowUnmatched { + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. + hj.probeState.buildIdx[i] = 0 + } else { hj.probeState.buildIdx[i] = int(id - 1) } { diff --git a/pkg/sql/colexec/hashjoiner.go b/pkg/sql/colexec/hashjoiner.go index 1a99fc92345b..e7b15a3ee03c 100644 --- a/pkg/sql/colexec/hashjoiner.go +++ b/pkg/sql/colexec/hashjoiner.go @@ -316,7 +316,7 @@ func (hj *hashJoiner) build(ctx context.Context) { // same and visited slices for the prober. if !hj.spec.rightDistinct && hj.spec.joinType != descpb.LeftAntiJoin && hj.spec.joinType != descpb.ExceptAllJoin { // We don't need same with LEFT ANTI and EXCEPT ALL joins because - // they have separate collectAnti* methods. + // they have separate collectLeftAnti method. hj.ht.same = maybeAllocateUint64Array(hj.ht.same, hj.ht.vals.Length()+1) } if !hj.spec.rightDistinct || hj.spec.joinType.IsSetOpJoin() { @@ -632,11 +632,9 @@ func (hj *hashJoiner) reset(ctx context.Context) { } hj.state = hjBuilding hj.ht.reset(ctx) - copy(hj.probeState.buildIdx[:coldata.BatchSize()], zeroIntColumn) - copy(hj.probeState.probeIdx[:coldata.BatchSize()], zeroIntColumn) - if hj.spec.left.outer { - copy(hj.probeState.probeRowUnmatched[:coldata.BatchSize()], zeroBoolColumn) - } + // Note that we don't zero out hj.probeState.buildIdx, + // hj.probeState.probeIdx, and hj.probeState.probeRowUnmatched because the + // values in these slices are always set in collecting methods. // hj.probeState.buildRowMatched is reset after building the hash table is // complete in build() method. hj.emittingRightState.rowIdx = 0 diff --git a/pkg/sql/colexec/hashjoiner_tmpl.go b/pkg/sql/colexec/hashjoiner_tmpl.go index 65808c60173d..5dd92f1ba4ec 100644 --- a/pkg/sql/colexec/hashjoiner_tmpl.go +++ b/pkg/sql/colexec/hashjoiner_tmpl.go @@ -36,15 +36,16 @@ func collectProbeOuter( return nResults } - hj.probeState.probeRowUnmatched[nResults] = currentID == 0 - if currentID > 0 { - hj.probeState.buildIdx[nResults] = int(currentID - 1) - } else { - // If currentID == 0, then probeRowUnmatched will have been set - and - // we set the corresponding buildIdx to zero so that (as long as the - // build hash table has at least one row) we can copy the values vector - // without paying attention to probeRowUnmatched. + rowUnmatched := currentID == 0 + hj.probeState.probeRowUnmatched[nResults] = rowUnmatched + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. hj.probeState.buildIdx[nResults] = 0 + } else { + hj.probeState.buildIdx[nResults] = int(currentID - 1) } hj.probeState.probeIdx[nResults] = getIdx(i, sel, useSel) currentID = hj.ht.same[currentID] @@ -142,7 +143,13 @@ func distinctCollectProbeOuter(hj *hashJoiner, batchSize int, sel []int, useSel id := hj.ht.probeScratch.groupID[i] rowUnmatched := id == 0 hj.probeState.probeRowUnmatched[i] = rowUnmatched - if !rowUnmatched { + if rowUnmatched { + // The row is unmatched, and we set the corresponding buildIdx + // to zero so that (as long as the build hash table has at least + // one row) we can copy the values vector without paying + // attention to probeRowUnmatched. + hj.probeState.buildIdx[i] = 0 + } else { hj.probeState.buildIdx[i] = int(id - 1) } hj.probeState.probeIdx[i] = getIdx(i, sel, useSel) From d9e718e3dd04d2af24deab0087ded7d3bfca1272 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Mon, 16 Nov 2020 15:21:19 -1000 Subject: [PATCH 6/6] colexec: make hash joiner more dynamic This commit refactors the hash joiner slightly in order to avoid allocating slices with big fixed size which makes the algorithm more dynamic. Release note: None --- pkg/sql/catalog/descpb/join_type.go | 18 ++++ pkg/sql/colexec/colbuilder/execplan.go | 5 +- pkg/sql/colexec/hashjoiner.eg.go | 41 ++++----- pkg/sql/colexec/hashjoiner.go | 110 ++++++++++++++++--------- pkg/sql/colexec/hashjoiner_test.go | 3 +- pkg/sql/colexec/hashjoiner_tmpl.go | 37 ++++----- 6 files changed, 124 insertions(+), 90 deletions(-) diff --git a/pkg/sql/catalog/descpb/join_type.go b/pkg/sql/catalog/descpb/join_type.go index d58dc941f2f5..128f6000944d 100644 --- a/pkg/sql/catalog/descpb/join_type.go +++ b/pkg/sql/catalog/descpb/join_type.go @@ -89,3 +89,21 @@ func (j JoinType) IsEmptyOutputWhenRightIsEmpty() bool { return false } } + +// IsLeftOuterOrFullOuter returns whether j is either LEFT OUTER or FULL OUTER +// join type. +func (j JoinType) IsLeftOuterOrFullOuter() bool { + return j == LeftOuterJoin || j == FullOuterJoin +} + +// IsLeftAntiOrExceptAll returns whether j is either LEFT ANTI or EXCEPT ALL +// join type. +func (j JoinType) IsLeftAntiOrExceptAll() bool { + return j == LeftAntiJoin || j == ExceptAllJoin +} + +// IsRightSemiOrRightAnti returns whether j is either RIGHT SEMI or RIGHT ANTI +// join type. +func (j JoinType) IsRightSemiOrRightAnti() bool { + return j == RightSemiJoin || j == RightAntiJoin +} diff --git a/pkg/sql/colexec/colbuilder/execplan.go b/pkg/sql/colexec/colbuilder/execplan.go index eb4f8c2e3a75..5e26d9229809 100644 --- a/pkg/sql/colexec/colbuilder/execplan.go +++ b/pkg/sql/colexec/colbuilder/execplan.go @@ -860,7 +860,7 @@ func NewColOperator( // joiner, in order to handle NULL values correctly, needs to think // that an empty set of equality columns doesn't form a key. rightEqColsAreKey := core.HashJoiner.RightEqColumnsAreKey && len(core.HashJoiner.RightEqColumns) > 0 - hjSpec, err := colexec.MakeHashJoinerSpec( + hjSpec := colexec.MakeHashJoinerSpec( core.HashJoiner.Type, core.HashJoiner.LeftEqColumns, core.HashJoiner.RightEqColumns, @@ -868,9 +868,6 @@ func NewColOperator( rightTypes, rightEqColsAreKey, ) - if err != nil { - return r, err - } inMemoryHashJoiner := colexec.NewHashJoiner( colmem.NewAllocator(ctx, hashJoinerMemAccount, factory), diff --git a/pkg/sql/colexec/hashjoiner.eg.go b/pkg/sql/colexec/hashjoiner.eg.go index 6794993bad77..f7620d59452e 100644 --- a/pkg/sql/colexec/hashjoiner.eg.go +++ b/pkg/sql/colexec/hashjoiner.eg.go @@ -9,10 +9,7 @@ package colexec -import ( - "github.com/cockroachdb/cockroach/pkg/col/coldata" - "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" -) +import "github.com/cockroachdb/cockroach/pkg/col/coldata" const _ = "template_collectProbeOuter" @@ -24,7 +21,7 @@ func collectProbeOuter_false( currentID := hj.ht.probeScratch.headID[i] for { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -71,7 +68,7 @@ func collectProbeOuter_true( currentID := hj.ht.probeScratch.headID[i] for { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -118,7 +115,7 @@ func collectProbeNoOuter_false( for i := hj.probeState.prevBatchResumeIdx; i < batchSize; i++ { currentID := hj.ht.probeScratch.headID[i] for currentID != 0 { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -150,7 +147,7 @@ func collectProbeNoOuter_true( for i := hj.probeState.prevBatchResumeIdx; i < batchSize; i++ { currentID := hj.ht.probeScratch.headID[i] for currentID != 0 { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -370,12 +367,12 @@ func distinctCollectProbeNoOuter_true( func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int { nResults := int(0) - if hj.spec.joinType == descpb.RightSemiJoin || hj.spec.joinType == descpb.RightAntiJoin { + if hj.spec.joinType.IsRightSemiOrRightAnti() { collectRightSemiAnti(hj, batchSize) return 0 } - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { if sel != nil { nResults = collectProbeOuter_true(hj, batchSize, nResults, batch, sel) } else { @@ -383,17 +380,15 @@ func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int } } else { if sel != nil { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { nResults = collectLeftAnti_true(hj, batchSize, nResults, batch, sel) - default: + } else { nResults = collectProbeNoOuter_true(hj, batchSize, nResults, batch, sel) } } else { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { nResults = collectLeftAnti_false(hj, batchSize, nResults, batch, sel) - default: + } else { nResults = collectProbeNoOuter_false(hj, batchSize, nResults, batch, sel) } } @@ -408,12 +403,12 @@ func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int func (hj *hashJoiner) distinctCollect(batch coldata.Batch, batchSize int, sel []int) int { nResults := int(0) - if hj.spec.joinType == descpb.RightSemiJoin || hj.spec.joinType == descpb.RightAntiJoin { + if hj.spec.joinType.IsRightSemiOrRightAnti() { collectRightSemiAnti(hj, batchSize) return 0 } - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { nResults = batchSize if sel != nil { @@ -423,23 +418,21 @@ func (hj *hashJoiner) distinctCollect(batch coldata.Batch, batchSize int, sel [] } } else { if sel != nil { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { // For LEFT ANTI and EXCEPT ALL joins we don't care whether the build // (right) side was distinct, so we only have single variation of COLLECT // method. nResults = collectLeftAnti_true(hj, batchSize, nResults, batch, sel) - default: + } else { nResults = distinctCollectProbeNoOuter_true(hj, batchSize, nResults, sel) } } else { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { // For LEFT ANTI and EXCEPT ALL joins we don't care whether the build // (right) side was distinct, so we only have single variation of COLLECT // method. nResults = collectLeftAnti_false(hj, batchSize, nResults, batch, sel) - default: + } else { nResults = distinctCollectProbeNoOuter_false(hj, batchSize, nResults, sel) } } diff --git a/pkg/sql/colexec/hashjoiner.go b/pkg/sql/colexec/hashjoiner.go index e7b15a3ee03c..acf27f635746 100644 --- a/pkg/sql/colexec/hashjoiner.go +++ b/pkg/sql/colexec/hashjoiner.go @@ -77,9 +77,6 @@ type hashJoinerSourceSpec struct { // sourceTypes specify the types of the input columns of the source table for // the hash joiner. sourceTypes []*types.T - - // outer specifies whether an outer join is required over the input. - outer bool } // hashJoiner performs a hash join on the input tables equality columns. @@ -198,11 +195,10 @@ type hashJoiner struct { buildIdx []int probeIdx []int - // probeRowUnmatched is used in the case that the spec.left.outer is true. - // This means that an outer join is performed on the probe side and we use - // probeRowUnmatched to represent that the resulting columns should be NULL on - // the build table. This indicates that the probe table row did not match any - // build table rows. + // probeRowUnmatched is used in the case of left/full outer joins. We + // use probeRowUnmatched to represent that the resulting columns should + // be NULL on the build table. This indicates that the probe table row + // did not match any build table rows. probeRowUnmatched []bool // buildRowMatched is used in the case that spec.trackBuildMatches is true. This // means that an outer join is performed on the build side and buildRowMatched @@ -314,7 +310,7 @@ func (hj *hashJoiner) build(ctx context.Context) { // We might have duplicates in the hash table, so we need to set up // same and visited slices for the prober. - if !hj.spec.rightDistinct && hj.spec.joinType != descpb.LeftAntiJoin && hj.spec.joinType != descpb.ExceptAllJoin { + if !hj.spec.rightDistinct && !hj.spec.joinType.IsLeftAntiOrExceptAll() { // We don't need same with LEFT ANTI and EXCEPT ALL joins because // they have separate collectLeftAnti method. hj.ht.same = maybeAllocateUint64Array(hj.ht.same, hj.ht.vals.Length()+1) @@ -345,6 +341,19 @@ func (hj *hashJoiner) build(ctx context.Context) { // didn't get a match when matched==false (right/full outer and right anti // joins) or did get a match when matched==true (right semi joins). func (hj *hashJoiner) emitRight(matched bool) { + // Make sure that hj.probeState.buildIdx is of sufficient size (it is used + // as a selection vector to select only the necessary tuples). + buildIdxSize := hj.ht.vals.Length() - hj.emittingRightState.rowIdx + if buildIdxSize > coldata.BatchSize() { + buildIdxSize = coldata.BatchSize() + } + if cap(hj.probeState.buildIdx) < buildIdxSize { + hj.probeState.buildIdx = make([]int, buildIdxSize) + } else { + hj.probeState.buildIdx = hj.probeState.buildIdx[:buildIdxSize] + } + + // Find the next batch of tuples that have the requested 'matched' value. nResults := 0 for nResults < coldata.BatchSize() && hj.emittingRightState.rowIdx < hj.ht.vals.Length() { if hj.probeState.buildRowMatched[hj.emittingRightState.rowIdx] == matched { @@ -390,6 +399,45 @@ func (hj *hashJoiner) emitRight(matched bool) { }) } +// prepareForCollecting sets up the hash joiner for collecting by making sure +// that various slices in hj.probeState are of sufficient length depending on +// the join type. Note that batchSize might cap the number of tuples collected +// in a single output batch (this is the case with non-distinct collectProbe* +// methods). +func (hj *hashJoiner) prepareForCollecting(batchSize int) { + if hj.spec.joinType.IsRightSemiOrRightAnti() { + // Right semi/anti joins have a separate collecting method that simply + // records the fact whether build rows had a match and don't need these + // probing slices. + return + } + // Note that we don't need to zero out the slices if they have enough + // capacity because the correct values will always be set in the collecting + // methods. + if cap(hj.probeState.probeIdx) < batchSize { + hj.probeState.probeIdx = make([]int, batchSize) + } else { + hj.probeState.probeIdx = hj.probeState.probeIdx[:batchSize] + } + if hj.spec.joinType.IsLeftAntiOrExceptAll() { + // Left anti and except all joins have special collectLeftAnti method + // that only uses probeIdx slice. + return + } + if hj.spec.joinType.IsLeftOuterOrFullOuter() { + if cap(hj.probeState.probeRowUnmatched) < batchSize { + hj.probeState.probeRowUnmatched = make([]bool, batchSize) + } else { + hj.probeState.probeRowUnmatched = hj.probeState.probeRowUnmatched[:batchSize] + } + } + if cap(hj.probeState.buildIdx) < batchSize { + hj.probeState.buildIdx = make([]int, batchSize) + } else { + hj.probeState.buildIdx = hj.probeState.buildIdx[:batchSize] + } +} + // exec is a general prober that works with non-distinct build table equality // columns. It returns a Batch with N + M columns where N is the number of // left source columns and M is the number of right source columns. The first N @@ -398,12 +446,17 @@ func (hj *hashJoiner) emitRight(matched bool) { func (hj *hashJoiner) exec(ctx context.Context) coldata.Batch { if batch := hj.probeState.prevBatch; batch != nil { // The previous result was bigger than the maximum batch size, so we didn't - // finish outputting it in the last call to probe. Continue outputting the + // finish outputting it in the last call to exec. Continue outputting the // result from the previous batch. hj.probeState.prevBatch = nil batchSize := batch.Length() sel := batch.Selection() + // Since we're probing the same batch for the second time, it is likely + // that every probe tuple has multiple matches, so we want to maximize + // the number of tuples we collect in a single output batch, and, + // therefore, we use coldata.BatchSize() here. + hj.prepareForCollecting(coldata.BatchSize()) nResults := hj.collect(batch, batchSize, sel) hj.congregate(nResults, batch) } else { @@ -433,6 +486,7 @@ func (hj *hashJoiner) exec(ctx context.Context) coldata.Batch { hj.ht.computeBuckets( ctx, hj.probeState.buckets, hj.ht.keys, batchSize, sel, ) + // Then, we initialize groupID with the initial hash buckets and // toCheck with all applicable indices. hj.ht.probeScratch.setupLimitedSlices(batchSize, hj.ht.buildMode) @@ -460,6 +514,9 @@ func (hj *hashJoiner) exec(ctx context.Context) coldata.Batch { nToCheck = uint64(batchSize) } + // Now we collect all matches that we can emit in the probing phase + // in a single batch. + hj.prepareForCollecting(batchSize) var nResults int if hj.spec.rightDistinct { for nToCheck > 0 { @@ -545,7 +602,7 @@ func (hj *hashJoiner) congregate(nResults int, batch coldata.Batch) { ) } } - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { // Add in the nulls we needed to set for the outer join. for i := range hj.spec.right.sourceTypes { outCol := hj.output.ColVec(i + rightColOffset) @@ -560,7 +617,7 @@ func (hj *hashJoiner) congregate(nResults int, batch coldata.Batch) { } if hj.spec.trackBuildMatches { - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { for i := 0; i < nResults; i++ { if !hj.probeState.probeRowUnmatched[i] { hj.probeState.buildRowMatched[hj.probeState.buildIdx[i]] = true @@ -653,20 +710,8 @@ func MakeHashJoinerSpec( leftTypes []*types.T, rightTypes []*types.T, rightDistinct bool, -) (HashJoinerSpec, error) { - var ( - spec HashJoinerSpec - leftOuter, rightOuter bool - ) +) HashJoinerSpec { switch joinType { - case descpb.InnerJoin: - case descpb.RightOuterJoin: - rightOuter = true - case descpb.LeftOuterJoin: - leftOuter = true - case descpb.FullOuterJoin: - rightOuter = true - leftOuter = true case descpb.LeftSemiJoin: // In a left semi join, we don't need to store anything but a single row per // build row, since all we care about is whether a row on the left matches @@ -691,8 +736,6 @@ func MakeHashJoinerSpec( // TODO(yuzefovich): refactor these joins to take advantage of the // actual distinctness information. rightDistinct = false - default: - return spec, errors.AssertionFailedf("hash join of type %s not supported", joinType) } var trackBuildMatches bool switch joinType { @@ -704,21 +747,18 @@ func MakeHashJoinerSpec( left := hashJoinerSourceSpec{ eqCols: leftEqCols, sourceTypes: leftTypes, - outer: leftOuter, } right := hashJoinerSourceSpec{ eqCols: rightEqCols, sourceTypes: rightTypes, - outer: rightOuter, } - spec = HashJoinerSpec{ + return HashJoinerSpec{ joinType: joinType, left: left, right: right, trackBuildMatches: trackBuildMatches, rightDistinct: rightDistinct, } - return spec, nil } // NewHashJoiner creates a new equality hash join operator on the left and @@ -738,17 +778,11 @@ func NewHashJoiner( if spec.joinType.ShouldIncludeRightColsInOutput() { outputTypes = append(outputTypes, spec.right.sourceTypes...) } - hj := &hashJoiner{ + return &hashJoiner{ twoInputNode: newTwoInputNode(leftSource, rightSource), buildSideAllocator: buildSideAllocator, outputUnlimitedAllocator: outputUnlimitedAllocator, spec: spec, outputTypes: outputTypes, } - hj.probeState.buildIdx = make([]int, coldata.BatchSize()) - hj.probeState.probeIdx = make([]int, coldata.BatchSize()) - if spec.left.outer { - hj.probeState.probeRowUnmatched = make([]bool, coldata.BatchSize()) - } - return hj } diff --git a/pkg/sql/colexec/hashjoiner_test.go b/pkg/sql/colexec/hashjoiner_test.go index 284fc2a37052..755122bf5d44 100644 --- a/pkg/sql/colexec/hashjoiner_test.go +++ b/pkg/sql/colexec/hashjoiner_test.go @@ -1082,13 +1082,12 @@ func BenchmarkHashJoiner(b *testing.B) { if fullOuter { joinType = descpb.FullOuterJoin } - hjSpec, err := MakeHashJoinerSpec( + hjSpec := MakeHashJoinerSpec( joinType, []uint32{0, 1}, []uint32{2, 3}, sourceTypes, sourceTypes, rightDistinct, ) - require.NoError(b, err) hj := NewHashJoiner( testAllocator, testAllocator, hjSpec, leftSource, rightSource, diff --git a/pkg/sql/colexec/hashjoiner_tmpl.go b/pkg/sql/colexec/hashjoiner_tmpl.go index 5dd92f1ba4ec..cf21c2c73c21 100644 --- a/pkg/sql/colexec/hashjoiner_tmpl.go +++ b/pkg/sql/colexec/hashjoiner_tmpl.go @@ -12,10 +12,7 @@ package colexec -import ( - "github.com/cockroachdb/cockroach/pkg/col/coldata" - "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" -) +import "github.com/cockroachdb/cockroach/pkg/col/coldata" // execgen:template func collectProbeOuter( @@ -30,7 +27,7 @@ func collectProbeOuter( currentID := hj.ht.probeScratch.headID[i] for { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -72,7 +69,7 @@ func collectProbeNoOuter( for i := hj.probeState.prevBatchResumeIdx; i < batchSize; i++ { currentID := hj.ht.probeScratch.headID[i] for currentID != 0 { - if nResults >= coldata.BatchSize() { + if nResults == len(hj.probeState.buildIdx) { hj.probeState.prevBatch = batch hj.probeState.prevBatchResumeIdx = i return nResults @@ -184,12 +181,12 @@ func distinctCollectProbeNoOuter( func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int { nResults := int(0) - if hj.spec.joinType == descpb.RightSemiJoin || hj.spec.joinType == descpb.RightAntiJoin { + if hj.spec.joinType.IsRightSemiOrRightAnti() { collectRightSemiAnti(hj, batchSize) return 0 } - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { if sel != nil { nResults = collectProbeOuter(hj, batchSize, nResults, batch, sel, true) } else { @@ -197,17 +194,15 @@ func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int } } else { if sel != nil { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { nResults = collectLeftAnti(hj, batchSize, nResults, batch, sel, true) - default: + } else { nResults = collectProbeNoOuter(hj, batchSize, nResults, batch, sel, true) } } else { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { nResults = collectLeftAnti(hj, batchSize, nResults, batch, sel, false) - default: + } else { nResults = collectProbeNoOuter(hj, batchSize, nResults, batch, sel, false) } } @@ -222,12 +217,12 @@ func (hj *hashJoiner) collect(batch coldata.Batch, batchSize int, sel []int) int func (hj *hashJoiner) distinctCollect(batch coldata.Batch, batchSize int, sel []int) int { nResults := int(0) - if hj.spec.joinType == descpb.RightSemiJoin || hj.spec.joinType == descpb.RightAntiJoin { + if hj.spec.joinType.IsRightSemiOrRightAnti() { collectRightSemiAnti(hj, batchSize) return 0 } - if hj.spec.left.outer { + if hj.spec.joinType.IsLeftOuterOrFullOuter() { nResults = batchSize if sel != nil { @@ -237,23 +232,21 @@ func (hj *hashJoiner) distinctCollect(batch coldata.Batch, batchSize int, sel [] } } else { if sel != nil { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { // For LEFT ANTI and EXCEPT ALL joins we don't care whether the build // (right) side was distinct, so we only have single variation of COLLECT // method. nResults = collectLeftAnti(hj, batchSize, nResults, batch, sel, true) - default: + } else { nResults = distinctCollectProbeNoOuter(hj, batchSize, nResults, sel, true) } } else { - switch hj.spec.joinType { - case descpb.LeftAntiJoin, descpb.ExceptAllJoin: + if hj.spec.joinType.IsLeftAntiOrExceptAll() { // For LEFT ANTI and EXCEPT ALL joins we don't care whether the build // (right) side was distinct, so we only have single variation of COLLECT // method. nResults = collectLeftAnti(hj, batchSize, nResults, batch, sel, false) - default: + } else { nResults = distinctCollectProbeNoOuter(hj, batchSize, nResults, sel, false) } }