From a7958705afcc17e853664a16b87467dfbadf74c8 Mon Sep 17 00:00:00 2001 From: He Liu Date: Wed, 27 Oct 2021 12:00:30 -0700 Subject: [PATCH 01/69] Disable HA in DataLossRecovery test, and reenabled the test. --- tests/CMakeLists.txt | 2 +- tests/fast/DataLossRecovery.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0215a95cd5a..56d12d77a57 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -134,7 +134,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/CycleAndLock.toml) add_fdb_test(TEST_FILES fast/CycleTest.toml) add_fdb_test(TEST_FILES fast/ChangeFeeds.toml) - add_fdb_test(TEST_FILES fast/DataLossRecovery.toml IGNORE) # TODO Re-enable once failures are fixed + add_fdb_test(TEST_FILES fast/DataLossRecovery.toml) add_fdb_test(TEST_FILES fast/FuzzApiCorrectness.toml) add_fdb_test(TEST_FILES fast/FuzzApiCorrectnessClean.toml) add_fdb_test(TEST_FILES fast/IncrementalBackup.toml) diff --git a/tests/fast/DataLossRecovery.toml b/tests/fast/DataLossRecovery.toml index 6cebd91b979..d45fc5056c6 100644 --- a/tests/fast/DataLossRecovery.toml +++ b/tests/fast/DataLossRecovery.toml @@ -1,4 +1,5 @@ [configuration] +generateFearless = false config = 'triple' storageEngineType = 0 processesPerMachine = 2 From 628317b3b5381d4817c8037d891620e38d5ee91c Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Wed, 27 Oct 2021 15:51:21 -0700 Subject: [PATCH 02/69] Separate out memory benchmarks in flowbench --- flowbench/BenchHash.cpp | 27 ---------------------- flowbench/BenchMem.cpp | 48 ++++++++++++++++++++++++++++++++++++++++ flowbench/CMakeLists.txt | 3 ++- 3 files changed, 50 insertions(+), 28 deletions(-) create mode 100644 flowbench/BenchMem.cpp diff --git a/flowbench/BenchHash.cpp b/flowbench/BenchHash.cpp index 2cb0bf428b2..e5a2fd8401c 100644 --- a/flowbench/BenchHash.cpp +++ b/flowbench/BenchHash.cpp @@ -67,30 +67,3 @@ static void bench_hash(benchmark::State& state) { BENCHMARK_TEMPLATE(bench_hash, HashType::CRC32C)->DenseRange(2, 18)->ReportAggregatesOnly(true); BENCHMARK_TEMPLATE(bench_hash, HashType::HashLittle2)->DenseRange(2, 18)->ReportAggregatesOnly(true); BENCHMARK_TEMPLATE(bench_hash, HashType::XXHash3)->DenseRange(2, 18)->ReportAggregatesOnly(true); - -static void bench_memcmp(benchmark::State& state) { - constexpr int kLength = 10000; - std::unique_ptr b1{ new char[kLength] }; - std::unique_ptr b2{ new char[kLength] }; - memset(b1.get(), 0, kLength); - memset(b2.get(), 0, kLength); - b2.get()[kLength - 1] = 1; - - while (state.KeepRunning()) { - benchmark::DoNotOptimize(memcmp(b1.get(), b2.get(), kLength)); - } -} - -static void bench_memcpy(benchmark::State& state) { - constexpr int kLength = 10000; - std::unique_ptr b1{ new char[kLength] }; - std::unique_ptr b2{ new char[kLength] }; - memset(b1.get(), 0, kLength); - - while (state.KeepRunning()) { - benchmark::DoNotOptimize(memcpy(b2.get(), b1.get(), kLength)); - } -} - -BENCHMARK(bench_memcmp); -BENCHMARK(bench_memcpy); \ No newline at end of file diff --git a/flowbench/BenchMem.cpp b/flowbench/BenchMem.cpp new file mode 100644 index 00000000000..52b8e0c493b --- /dev/null +++ b/flowbench/BenchMem.cpp @@ -0,0 +1,48 @@ +/* + * BenchMem.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2020 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "benchmark/benchmark.h" + +static void bench_memcmp(benchmark::State& state) { + constexpr int kLength = 10000; + std::unique_ptr b1{ new char[kLength] }; + std::unique_ptr b2{ new char[kLength] }; + memset(b1.get(), 0, kLength); + memset(b2.get(), 0, kLength); + b2.get()[kLength - 1] = 1; + + while (state.KeepRunning()) { + benchmark::DoNotOptimize(memcmp(b1.get(), b2.get(), kLength)); + } +} + +static void bench_memcpy(benchmark::State& state) { + constexpr int kLength = 10000; + std::unique_ptr b1{ new char[kLength] }; + std::unique_ptr b2{ new char[kLength] }; + memset(b1.get(), 0, kLength); + + while (state.KeepRunning()) { + benchmark::DoNotOptimize(memcpy(b2.get(), b1.get(), kLength)); + } +} + +BENCHMARK(bench_memcmp); +BENCHMARK(bench_memcpy); diff --git a/flowbench/CMakeLists.txt b/flowbench/CMakeLists.txt index 8caad0ce02e..0a8582afc1d 100644 --- a/flowbench/CMakeLists.txt +++ b/flowbench/CMakeLists.txt @@ -1,8 +1,9 @@ set(FLOWBENCH_SRCS flowbench.actor.cpp - BenchMetadataCheck.cpp BenchHash.cpp BenchIterate.cpp + BenchMem.cpp + BenchMetadataCheck.cpp BenchPopulate.cpp BenchRandom.cpp BenchRef.cpp From 2232ba0a3f3b7c0c5c5fd16c119d2bbdf479a2a2 Mon Sep 17 00:00:00 2001 From: QA Hoang Date: Tue, 26 Oct 2021 17:05:12 -0700 Subject: [PATCH 03/69] fixed merging from PR 5710 --- bindings/c/test/mako/mako.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bindings/c/test/mako/mako.c b/bindings/c/test/mako/mako.c index dc429aa78cf..f2027c42178 100644 --- a/bindings/c/test/mako/mako.c +++ b/bindings/c/test/mako/mako.c @@ -1651,11 +1651,12 @@ int parse_args(int argc, char* argv[], mako_args_t* args) { int c; int idx; while (1) { - const char* short_options = "a:c:p:t:r:s:i:x:v:m:hz"; + const char* short_options = "a:c:d:p:t:r:s:i:x:v:m:hz"; static struct option long_options[] = { /* name, has_arg, flag, val */ { "api_version", required_argument, NULL, 'a' }, { "cluster", required_argument, NULL, 'c' }, + { "num_databases", optional_argument, NULL, 'd' }, { "procs", required_argument, NULL, 'p' }, { "threads", required_argument, NULL, 't' }, { "rows", required_argument, NULL, 'r' }, @@ -2525,7 +2526,7 @@ int stats_process_main(mako_args_t* args, fprintf(fp, "\"value_length\": %d,", args->value_length); fprintf(fp, "\"commit_get\": %d,", args->commit_get); fprintf(fp, "\"verbose\": %d,", args->verbose); - fprintf(fp, "\"cluster_file\": \"%s\",", args->cluster_files); + fprintf(fp, "\"cluster_files\": \"%s\",", args->cluster_files); fprintf(fp, "\"log_group\": \"%s\",", args->log_group); fprintf(fp, "\"prefixpadding\": %d,", args->prefixpadding); fprintf(fp, "\"trace\": %d,", args->trace); From 6281e647784e74dccb3a6cb88efb9d8b9cccd376 Mon Sep 17 00:00:00 2001 From: QA Hoang Date: Tue, 26 Oct 2021 17:16:06 -0700 Subject: [PATCH 04/69] updated mako documentation --- bindings/c/test/mako/mako.rst | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bindings/c/test/mako/mako.rst b/bindings/c/test/mako/mako.rst index 8a63944869c..d1e978bf4dc 100644 --- a/bindings/c/test/mako/mako.rst +++ b/bindings/c/test/mako/mako.rst @@ -39,7 +39,11 @@ Arguments | - ``run``: Run the benchmark - | ``-c | --cluster `` - | FDB cluster file (Required) + | FDB cluster files (Required, comma-separated) + +- | ``-d | --num_databases `` + | Number of database objects (Default: 1) + | If more than 1 cluster is provided, this value should be >= number of cluster - | ``-a | --api_version `` | FDB API version to use (Default: Latest) @@ -110,6 +114,13 @@ Arguments | - 2 – Annoying | - 3 – Very Annoying (a.k.a. DEBUG) +- | ``--disable_ryw`` + | Disable snapshot read-your-writes + +- | ``--json_report`` defaults to ``mako.json`` + | ``--json_report=PATH`` + | Output stats to the specified json file + Transaction Specification ========================= From dbf7f9b04721cc90bd2851fb86b18d57bff94760 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 10:22:50 -0700 Subject: [PATCH 05/69] Add some includes to BenchMem.cpp --- flowbench/BenchMem.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flowbench/BenchMem.cpp b/flowbench/BenchMem.cpp index 52b8e0c493b..5373353fb0c 100644 --- a/flowbench/BenchMem.cpp +++ b/flowbench/BenchMem.cpp @@ -18,6 +18,9 @@ * limitations under the License. */ +#include +#include + #include "benchmark/benchmark.h" static void bench_memcmp(benchmark::State& state) { From 4d8ee2ed33bb80a54836980e11e0b6335e32089a Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 28 Oct 2021 16:38:04 -0700 Subject: [PATCH 06/69] fix: simple recruitment could succeed with less than the required replication factor --- fdbserver/ClusterController.actor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 298b7867f5c..d6a7b5d9d29 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -904,6 +904,10 @@ class ClusterControllerData { } } + if (resultSet.size() < required) { + throw no_more_servers(); + } + // Continue adding workers to the result set until we reach the desired number of workers for (auto workerIter = fitness_workers.begin(); workerIter != fitness_workers.end() && resultSet.size() < desired; From ddf235713e3a8b65db712d52c3fe5984aa7b1903 Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Thu, 28 Oct 2021 16:40:30 -0700 Subject: [PATCH 07/69] strengthen assert --- fdbserver/ClusterController.actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index d6a7b5d9d29..1456319a3fe 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -926,7 +926,7 @@ class ClusterControllerData { } } - ASSERT(resultSet.size() <= desired); + ASSERT(resultSet.size() >= required && resultSet.size() <= desired); for (auto& result : resultSet) { id_used[result.interf.locality.processId()]++; From fd0aeaf48ebdc3ab4917149a82f626b95b5712da Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Sat, 21 Aug 2021 20:40:20 -0700 Subject: [PATCH 08/69] Add a new process launcher for FDB on Kube. --- .../.testdata/default_config.json | 36 +++ fdbkubernetesmonitor/.testdata/fdb.cluster | 1 + fdbkubernetesmonitor/.testdata/test_env.sh | 5 + fdbkubernetesmonitor/config.go | 137 ++++++++++ fdbkubernetesmonitor/config_test.go | 102 ++++++++ fdbkubernetesmonitor/go.mod | 27 ++ fdbkubernetesmonitor/go.sum | 6 + fdbkubernetesmonitor/main.go | 41 +++ fdbkubernetesmonitor/monitor.go | 238 ++++++++++++++++++ 9 files changed, 593 insertions(+) create mode 100644 fdbkubernetesmonitor/.testdata/default_config.json create mode 100644 fdbkubernetesmonitor/.testdata/fdb.cluster create mode 100644 fdbkubernetesmonitor/.testdata/test_env.sh create mode 100644 fdbkubernetesmonitor/config.go create mode 100644 fdbkubernetesmonitor/config_test.go create mode 100644 fdbkubernetesmonitor/go.mod create mode 100644 fdbkubernetesmonitor/go.sum create mode 100644 fdbkubernetesmonitor/main.go create mode 100644 fdbkubernetesmonitor/monitor.go diff --git a/fdbkubernetesmonitor/.testdata/default_config.json b/fdbkubernetesmonitor/.testdata/default_config.json new file mode 100644 index 00000000000..ecb09eca28d --- /dev/null +++ b/fdbkubernetesmonitor/.testdata/default_config.json @@ -0,0 +1,36 @@ +{ + "version": "6.3.0", + "arguments": [ + {"value": "--cluster_file"}, + {"value": ".testdata/fdb.cluster"}, + {"value": "--public_address"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_PUBLIC_IP"}, + {"value": ":"}, + {"type": "ProcessNumber", "offset": 4499, "multiplier": 2} + ]}, + {"value": "--listen_address"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_POD_IP"}, + {"value": ":"}, + {"type": "ProcessNumber", "offset": 4499, "multiplier": 2} + ]}, + {"value": "--datadir"}, + {"type": "Concatenate", "values": [ + {"value": ".testdata/data/"}, + {"type": "ProcessNumber"} + ]}, + {"value": "--class"}, + {"value": "storage"}, + {"value": "--locality_zoneid"}, + {"type": "Environment", "source": "FDB_ZONE_ID"}, + {"value": "--locality_instance-id"}, + {"type": "Environment", "source": "FDB_INSTANCE_ID"}, + {"value": "--locality_process-id"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_INSTANCE_ID"}, + {"value": "-"}, + {"type": "ProcessNumber"} + ]} + ] +} diff --git a/fdbkubernetesmonitor/.testdata/fdb.cluster b/fdbkubernetesmonitor/.testdata/fdb.cluster new file mode 100644 index 00000000000..4b364771734 --- /dev/null +++ b/fdbkubernetesmonitor/.testdata/fdb.cluster @@ -0,0 +1 @@ +test:test@127.0.0.1:4501 diff --git a/fdbkubernetesmonitor/.testdata/test_env.sh b/fdbkubernetesmonitor/.testdata/test_env.sh new file mode 100644 index 00000000000..ced881d3471 --- /dev/null +++ b/fdbkubernetesmonitor/.testdata/test_env.sh @@ -0,0 +1,5 @@ +export FDB_PUBLIC_IP=127.0.0.1 +export FDB_POD_IP=127.0.0.1 +export FDB_ZONE_ID=localhost +export FDB_MACHINE_ID=localhost +export FDB_INSTANCE_ID=storage-1 diff --git a/fdbkubernetesmonitor/config.go b/fdbkubernetesmonitor/config.go new file mode 100644 index 00000000000..2c89826cb2d --- /dev/null +++ b/fdbkubernetesmonitor/config.go @@ -0,0 +1,137 @@ +// config.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "fmt" + "os" +) + +// ProcessConfiguration models the configuration for starting a FoundationDB +// process. +type ProcessConfiguration struct { + // Version provides the version of FoundationDB the process should run. + Version string `json:"version"` + + // ServerCount defines the number of processes to start. + ServerCount int `json:"serverCount,omitempty"` + + // Arguments provides the arugments to the process. + Arguments []Argument `json:"arguments,omitempty"` +} + +// Argument defines an argument to the process. +type Argument struct { + // ArgumentType determines how the value is generated. + ArgumentType ArgumentType `json:"type,omitempty"` + + // Value provides the value for a Literal type argument. + Value string `json:"value,omitempty"` + + // Values provides the sub-values for a Concatenate type argument. + Values []Argument `json:"values,omitempty"` + + // Source provides the name of the environment variable to use for an + // Environment type argument. + Source string `json:"source,omitempty"` + + // Multiplier provides a multiplier for the process number for ProcessNumber + // type arguments. + Multiplier int `json:"multiplier,omitempty"` + + // Offset provides an offset to add to the process number for ProcessNumber + // type argujments. + Offset int `json:"offset,omitempty"` +} + +// ArgumentType defines the types for arguments. +type ArgumentType string + +const ( + // LiteralArgumentType defines an argument with a literal string value. + LiteralArgumentType ArgumentType = "Literal" + + // ConcatenateArgumentType defines an argument composed of other arguments. + ConcatenateArgumentType = "Concatenate" + + // EnvironmentArgumentType defines an argument that is pulled from an + // environment variable. + EnvironmentArgumentType = "Environment" + + // ProcessNumberArgumentType defines an argument that is calculated using + // the number of the process in the process list. + ProcessNumberArgumentType = "ProcessNumber" +) + +// GenerateArgument processes an argument and generates its string +// representation. +func (argument Argument) GenerateArgument(processNumber int, env map[string]string) (string, error) { + switch argument.ArgumentType { + case "": + fallthrough + case LiteralArgumentType: + return argument.Value, nil + case ConcatenateArgumentType: + concatenated := "" + for _, childArgument := range argument.Values { + childValue, err := childArgument.GenerateArgument(processNumber, env) + if err != nil { + return "", err + } + concatenated += childValue + } + return concatenated, nil + case ProcessNumberArgumentType: + number := processNumber + if argument.Multiplier != 0 { + number = number * argument.Multiplier + } + number = number + argument.Offset + return fmt.Sprintf("%d", number), nil + case EnvironmentArgumentType: + var value string + var present bool + if env != nil { + value, present = env[argument.Source] + } else { + value, present = os.LookupEnv(argument.Source) + } + if !present { + return "", fmt.Errorf("Missing environment variable %s", argument.Source) + } + return value, nil + default: + return "", fmt.Errorf("Unsupported argument type %s", argument.ArgumentType) + } +} + +// GenerateArguments intreprets the arguments in the process configuration and +// generates a command invocation. +func (configuration *ProcessConfiguration) GenerateArguments(processNumber int, env map[string]string) ([]string, error) { + results := make([]string, len(configuration.Arguments)) + for indexOfArgument, argument := range configuration.Arguments { + result, err := argument.GenerateArgument(processNumber, env) + if err != nil { + return nil, err + } + results[indexOfArgument] = result + } + return results, nil +} diff --git a/fdbkubernetesmonitor/config_test.go b/fdbkubernetesmonitor/config_test.go new file mode 100644 index 00000000000..9aca4043bfc --- /dev/null +++ b/fdbkubernetesmonitor/config_test.go @@ -0,0 +1,102 @@ +// config_test.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "encoding/json" + "os" + "reflect" + "testing" +) + +func loadConfigFromFile(path string) (*ProcessConfiguration, error) { + file, err := os.Open(path) + if err != nil { + return nil, err + } + defer file.Close() + decoder := json.NewDecoder(file) + config := &ProcessConfiguration{} + err = decoder.Decode(config) + if err != nil { + return nil, err + } + return config, nil +} + +func TestGeneratingArgumentsForDefaultConfig(t *testing.T) { + config, err := loadConfigFromFile(".testdata/default_config.json") + if err != nil { + t.Error(err) + return + } + + arguments, err := config.GenerateArguments(1, map[string]string{ + "FDB_PUBLIC_IP": "10.0.0.1", + "FDB_POD_IP": "192.168.0.1", + "FDB_ZONE_ID": "zone1", + "FDB_INSTANCE_ID": "storage-1", + }) + if err != nil { + t.Error(err) + return + } + + expectedArguments := []string{ + "--cluster_file", ".testdata/fdb.cluster", + "--public_address", "10.0.0.1:4501", "--listen_address", "192.168.0.1:4501", + "--datadir", ".testdata/data/1", "--class", "storage", + "--locality_zoneid", "zone1", "--locality_instance-id", "storage-1", + "--locality_process-id", "storage-1-1", + } + + if !reflect.DeepEqual(arguments, expectedArguments) { + t.Logf("Expected arguments %v, but got arguments %v", expectedArguments, arguments) + t.Fail() + } +} + +func TestGeneratingArgumentForEnvironmentVariable(t *testing.T) { + argument := Argument{ArgumentType: EnvironmentArgumentType, Source: "FDB_ZONE_ID"} + + result, err := argument.GenerateArgument(1, map[string]string{"FDB_ZONE_ID": "zone1", "FDB_MACHINE_ID": "machine1"}) + if err != nil { + t.Error(err) + return + } + if result != "zone1" { + t.Logf("Expected result zone1, but got result %v", result) + t.Fail() + return + } + + _, err = argument.GenerateArgument(1, map[string]string{"FDB_MACHINE_ID": "machine1"}) + if err == nil { + t.Logf("Expected error result, but did not get an error") + t.Fail() + return + } + expectedError := "Missing environment variable FDB_ZONE_ID" + if err.Error() != expectedError { + t.Logf("Expected error %s, but got error %s", expectedError, err) + t.Fail() + return + } +} diff --git a/fdbkubernetesmonitor/go.mod b/fdbkubernetesmonitor/go.mod new file mode 100644 index 00000000000..11722261683 --- /dev/null +++ b/fdbkubernetesmonitor/go.mod @@ -0,0 +1,27 @@ +// go.mod +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +module github.com/apple/foundationdb/fdbkubernetesmonitor + +go 1.16 + +require ( + github.com/spf13/pflag v1.0.5 + github.com/fsnotify/fsnotify v1.5.0 +) diff --git a/fdbkubernetesmonitor/go.sum b/fdbkubernetesmonitor/go.sum new file mode 100644 index 00000000000..e77e248c3e1 --- /dev/null +++ b/fdbkubernetesmonitor/go.sum @@ -0,0 +1,6 @@ +github.com/fsnotify/fsnotify v1.5.0 h1:NO5hkcB+srp1x6QmwvNZLeaOgbM8cmBTN32THzjvu2k= +github.com/fsnotify/fsnotify v1.5.0/go.mod h1:BX0DCEr5pT4jm2CnQdVP1lFV521fcCNcyEeNp4DQQDk= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/fdbkubernetesmonitor/main.go b/fdbkubernetesmonitor/main.go new file mode 100644 index 00000000000..7f82af04f23 --- /dev/null +++ b/fdbkubernetesmonitor/main.go @@ -0,0 +1,41 @@ +// main.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "fmt" + + "github.com/spf13/pflag" +) + +var ( + inputDir string + fdbserverPath string + monitorConfFile string +) + +func main() { + pflag.StringVar(&fdbserverPath, "fdbserver-path", "/usr/bin/fdbserver", "Path to the fdbserver binary") + pflag.StringVar(&inputDir, "input-dir", ".", "Directory containing input files") + pflag.StringVar(&monitorConfFile, "input-monitor-conf", "config.json", "Name of the file in the input directory that contains the monitor configuration") + pflag.Parse() + + StartMonitor(fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) +} diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go new file mode 100644 index 00000000000..08d8a9cf35a --- /dev/null +++ b/fdbkubernetesmonitor/monitor.go @@ -0,0 +1,238 @@ +// monitor.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "encoding/json" + "io" + "log" + "os" + "os/exec" + "os/signal" + "sync" + "syscall" + "time" + + "github.com/fsnotify/fsnotify" +) + +// errorBackoffSeconds is the time to wait after a process fails before starting +// another process. +const errorBackoffSeconds = 5 + +// Monitor provides the main monitor loop +type Monitor struct { + // ConfigFile defines the path to the config file to load. + ConfigFile string + + // FDBServerPath defines the path to the fdbserver binary. + FDBServerPath string + + // ActiveConfiguration defines the active process configuration. + ActiveConfiguration *ProcessConfiguration + + // ActiveConfigurationBytes defines the source data for the active process + // configuration. + ActiveConfigurationBytes []byte + + // ProcessIDs stores the PIDs of the processes that are running. A PID of + // zero will indicate that a process does not have a run loop. A PID of -1 + // will indicate that a process has a run loop but is not currently running + // the subprocess. + ProcessesIDs []int + + // Mutex defines a mutex around working with configuration. + Mutex sync.Mutex +} + +// StartMonitor starts the monitor loop. +func StartMonitor(configFile string, fdbserverPath string) { + monitor := &Monitor{ConfigFile: configFile, FDBServerPath: fdbserverPath} + monitor.Run() +} + +// LoadConfiguration loads the latest configuration from the config file. +func (monitor *Monitor) LoadConfiguration() { + file, err := os.Open(monitor.ConfigFile) + if err != nil { + log.Print(err.Error()) + return + } + defer file.Close() + configuration := &ProcessConfiguration{} + configurationBytes, err := io.ReadAll(file) + if err != nil { + log.Print(err.Error()) + } + err = json.Unmarshal(configurationBytes, configuration) + if err != nil { + log.Print(err) + return + } + + _, err = configuration.GenerateArguments(1, nil) + if err != nil { + log.Print(err) + return + } + + log.Printf("Received new configuration file") + monitor.Mutex.Lock() + defer monitor.Mutex.Unlock() + + if configuration.ServerCount == 0 { + configuration.ServerCount = 1 + } + + if monitor.ProcessesIDs == nil { + monitor.ProcessesIDs = make([]int, configuration.ServerCount+1) + } else { + for len(monitor.ProcessesIDs) <= configuration.ServerCount { + monitor.ProcessesIDs = append(monitor.ProcessesIDs, 0) + } + } + + monitor.ActiveConfiguration = configuration + monitor.ActiveConfigurationBytes = configurationBytes + + for processNumber := 1; processNumber <= configuration.ServerCount; processNumber++ { + if monitor.ProcessesIDs[processNumber] == 0 { + monitor.ProcessesIDs[processNumber] = -1 + tempNumber := processNumber + go func() { monitor.RunProcess(tempNumber) }() + } + } +} + +// RunProcess runs a loop to continually start and watch a process. +func (monitor *Monitor) RunProcess(processNumber int) { + log.Printf("Starting run loop for subprocess %d", processNumber) + for { + arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil) + arguments = append([]string{monitor.FDBServerPath}, arguments...) + if err != nil { + log.Print(err) + time.Sleep(errorBackoffSeconds * time.Second) + } + cmd := exec.Cmd{ + Path: arguments[0], + Args: arguments, + Stdout: os.Stdout, + Stderr: os.Stderr, + } + + log.Printf("Starting subprocess #%d: %v", processNumber, arguments) + err = cmd.Start() + if err != nil { + log.Printf("Error from subprocess %d: %s", processNumber, err.Error()) + log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds) + time.Sleep(errorBackoffSeconds * time.Second) + continue + } + + monitor.Mutex.Lock() + monitor.ProcessesIDs[processNumber] = cmd.Process.Pid + monitor.Mutex.Unlock() + + err = cmd.Wait() + log.Printf("Subprocess #%d terminated", processNumber) + + if err != nil { + log.Printf("Error from subprocess #%d: %s", processNumber, err.Error()) + } + + monitor.Mutex.Lock() + monitor.ProcessesIDs[processNumber] = -1 + if monitor.ActiveConfiguration.ServerCount < processNumber { + log.Printf("Terminating run loop for subprocess %d", processNumber) + monitor.ProcessesIDs[processNumber] = 0 + monitor.Mutex.Unlock() + return + } + monitor.Mutex.Unlock() + + log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds) + time.Sleep(errorBackoffSeconds * time.Second) + } +} + +// WatchConfiguration detects changes to the monitor configuration file. +func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) { + for { + select { + case event, ok := <-watcher.Events: + if !ok { + return + } + log.Printf("Detected event on monitor conf file: %v", event) + if event.Op&fsnotify.Write == fsnotify.Write { + monitor.LoadConfiguration() + } + case err, ok := <-watcher.Errors: + if !ok { + return + } + log.Print(err) + } + } +} + +// Run runs the monitor loop. +func (monitor *Monitor) Run() { + done := make(chan bool, 1) + signals := make(chan os.Signal, 1) + signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM) + + go func() { + latestSignal := <-signals + log.Printf("Received signal %v", latestSignal) + for processNumber, processID := range monitor.ProcessesIDs { + if processID > 0 { + process, err := os.FindProcess(processID) + if err != nil { + log.Printf("Error finding subprocess #%d (PID %d): %s", processNumber, processID, err.Error()) + continue + } + log.Printf("Sending signal %v to subprocess #%d (PID %d)", latestSignal, processNumber, processID) + err = process.Signal(latestSignal) + if err != nil { + log.Printf("Error signaling subprocess #%d (PID %d): %s", processNumber, processID, err.Error()) + continue + } + } + } + done <- true + }() + + monitor.LoadConfiguration() + watcher, err := fsnotify.NewWatcher() + if err != nil { + panic(err) + } + err = watcher.Add(monitor.ConfigFile) + if err != nil { + panic(err) + } + + defer watcher.Close() + go func() { monitor.WatchConfiguration(watcher) }() + + <-done +} From 0f173edb47bfc633b1056d90e9c849f97829b41e Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Sat, 21 Aug 2021 22:11:53 -0700 Subject: [PATCH 09/69] Add a dockerfile for fdb-kubernetes-monitor. --- fdbkubernetesmonitor/monitor.go | 27 ++-- packaging/docker/kubernetes/Dockerfile | 76 ++++++++++ packaging/docker/kubernetes/statefulset.yaml | 149 +++++++++++++++++++ 3 files changed, 241 insertions(+), 11 deletions(-) create mode 100644 packaging/docker/kubernetes/Dockerfile create mode 100644 packaging/docker/kubernetes/statefulset.yaml diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index 08d8a9cf35a..3949a406819 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -97,10 +97,6 @@ func (monitor *Monitor) LoadConfiguration() { monitor.Mutex.Lock() defer monitor.Mutex.Unlock() - if configuration.ServerCount == 0 { - configuration.ServerCount = 1 - } - if monitor.ProcessesIDs == nil { monitor.ProcessesIDs = make([]int, configuration.ServerCount+1) } else { @@ -125,6 +121,15 @@ func (monitor *Monitor) LoadConfiguration() { func (monitor *Monitor) RunProcess(processNumber int) { log.Printf("Starting run loop for subprocess %d", processNumber) for { + monitor.Mutex.Lock() + if monitor.ActiveConfiguration.ServerCount < processNumber { + log.Printf("Terminating run loop for subprocess %d", processNumber) + monitor.ProcessesIDs[processNumber] = 0 + monitor.Mutex.Unlock() + return + } + monitor.Mutex.Unlock() + arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil) arguments = append([]string{monitor.FDBServerPath}, arguments...) if err != nil { @@ -160,12 +165,6 @@ func (monitor *Monitor) RunProcess(processNumber int) { monitor.Mutex.Lock() monitor.ProcessesIDs[processNumber] = -1 - if monitor.ActiveConfiguration.ServerCount < processNumber { - log.Printf("Terminating run loop for subprocess %d", processNumber) - monitor.ProcessesIDs[processNumber] = 0 - monitor.Mutex.Unlock() - return - } monitor.Mutex.Unlock() log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds) @@ -182,7 +181,13 @@ func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) { return } log.Printf("Detected event on monitor conf file: %v", event) - if event.Op&fsnotify.Write == fsnotify.Write { + if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create { + monitor.LoadConfiguration() + } else if event.Op&fsnotify.Remove == fsnotify.Remove { + err := watcher.Add(monitor.ConfigFile) + if err != nil { + panic(err) + } monitor.LoadConfiguration() } case err, ok := <-watcher.Errors: diff --git a/packaging/docker/kubernetes/Dockerfile b/packaging/docker/kubernetes/Dockerfile new file mode 100644 index 00000000000..6d8feab19c7 --- /dev/null +++ b/packaging/docker/kubernetes/Dockerfile @@ -0,0 +1,76 @@ +# Dockerfile +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2021 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This docker image assumes that the context for the docker build is pointed +# at the root of the foundationdb repository. + +# Build the Kubernetes monitor + +FROM golang:1.16.7-bullseye AS go-build + +COPY fdbkubernetesmonitor/ /fdbkubernetesmonitor +WORKDIR /fdbkubernetesmonitor +RUN go build -o /fdb-kubernetes-monitor ./... + +# Build the main image + +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y curl>=7.58.0-2ubuntu3.6 \ + dnsutils>=1:9.11.3+dfsg-1ubuntu1.7 \ + lsof>=4.89+dfsg-0.1 \ + tcptraceroute>=1.5beta7+debian-4build1 \ + telnet>=0.17-41 \ + netcat>=1.10-41.1 \ + strace>=4.21-1ubuntu1 \ + tcpdump>=4.9.3-0ubuntu0.18.04.1 \ + less>=487-0.1 \ + vim>=2:8.0.1453-1ubuntu1.4 \ + net-tools>=1.60+git20161116.90da8a0-1ubuntu1 \ + jq>=1.5+dfsg-2 && \ + rm -rf /var/lib/apt/lists/* + +ARG FDB_VERSION +ARG FDB_LIBRARY_VERSIONS="${FDB_VERSION}" +ARG FDB_WEBSITE=https://www.foundationdb.org + +COPY packaging/docker/website /mnt/website/ + +# Install FoundationDB Binaries +RUN mkdir -p /var/fdb/logs && mkdir -p /var/fdb/tmp && \ + curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz | tar zxf - --strip-components=1 && \ + chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ + mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin + +# Install additional FoundationDB Client Libraries +ADD packaging/docker/release/download_multiversion_libraries.bash /var/fdb/tmp +RUN bash /var/fdb/tmp/download_multiversion_libraries.bash $FDB_WEBSITE $FDB_LIBRARY_VERSIONS + +# Clean up temporary directories +RUN rm -rf /mnt/website && rm -r /var/fdb/tmp + +# Install the kubernetes monitor binary +COPY --from=go-build /fdb-kubernetes-monitor /usr/bin/ + +VOLUME /var/fdb/data + +# Runtime Configuration Options + +ENTRYPOINT ["/usr/bin/fdb-kubernetes-monitor"] diff --git a/packaging/docker/kubernetes/statefulset.yaml b/packaging/docker/kubernetes/statefulset.yaml new file mode 100644 index 00000000000..b8590dbc9e4 --- /dev/null +++ b/packaging/docker/kubernetes/statefulset.yaml @@ -0,0 +1,149 @@ +# statefulset.yaml +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2021 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file provides an example of using a statefulset to launch FDB processes +# using the foundationdb-kubernetes image. +# +# This is not a recommended way to run production clusters, but it can be useful +# to test the image in development. +# +# To start a cluster with this example, run the following steps: +# 1. Apply this file. +# 2. Wait for all pods to start. +# 3. Generate a connection string, using the following bash command: +# ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")'); echo test:test@$ips +# echo test:test@$ips +# 4. Update the ConfigMap below to have the results of that echo statement as +# the `fdb.cluster` entry, and change the `serverCount` field to `1`. +# 5. Apply the file again. +# 6. Watch the logs for the fdb-kubernetes-example pods to confirm that they +# have launched the fdbserver processes. +# 7. Exec into one of the pods, and run `fdbcli --exec "configure new double ssd"`. + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: fdb-kubernetes-example + labels: + app: fdb-kubernetes-example +spec: + selector: + matchLabels: + app: fdb-kubernetes-example + replicas: 5 + serviceName: fdb-kubernetes-example + template: + metadata: + labels: + app: fdb-kubernetes-example + spec: + containers: + - name: foundationdb + image: foundationdb/foundationdb-kubernetes:6.3.15 + env: + - name: FDB_CLUSTER_FILE + value: /var/fdb/data/fdb.cluster + - name: FDB_PUBLIC_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: FDB_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: FDB_MACHINE_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: FDB_ZONE_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: FDB_INSTANCE_ID + valueFrom: + fieldRef: + fieldPath: metadata.name + args: + - --input-dir + - /var/fdb/dynamic-conf + volumeMounts: + - name: dynamic-conf + mountPath: /var/fdb/dynamic-conf + - name: data + mountPath: /var/fdb/data + volumes: + - name: dynamic-conf + configMap: + name: fdb-kubernetes-example-config + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: fdb-kubernetes-example-config +data: + fdb.cluster: "" + config.json: | + { + "serverCount": 0 + "version": "6.3.15", + "arguments": [ + {"value": "--cluster_file"}, + {"value": "/var/fdb/data/fdb.cluster"}, + {"value": "--seed_cluster_file"}, + {"value": "/var/fdb/dynamic-conf/fdb.cluster"}, + {"value": "--public_address"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_PUBLIC_IP"}, + {"value": ":"}, + {"type": "ProcessNumber", "offset": 4499, "multiplier": 2} + ]}, + {"value": "--listen_address"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_POD_IP"}, + {"value": ":"}, + {"type": "ProcessNumber", "offset": 4499, "multiplier": 2} + ]}, + {"value": "--datadir"}, + {"type": "Concatenate", "values": [ + {"value": "/var/fdb/data/"}, + {"type": "ProcessNumber"} + ]}, + {"value": "--class"}, + {"value": "storage"}, + {"value": "--locality_zoneid"}, + {"type": "Environment", "source": "FDB_ZONE_ID"}, + {"value": "--locality_instance-id"}, + {"type": "Environment", "source": "FDB_INSTANCE_ID"}, + {"value": "--locality_process-id"}, + {"type": "Concatenate", "values": [ + {"type": "Environment", "source": "FDB_INSTANCE_ID"}, + {"value": "-"}, + {"type": "ProcessNumber"} + ]} + ] + } From 95ad5854beb1348e3d049325b44881d9d7ece085 Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Sun, 22 Aug 2021 01:28:09 -0700 Subject: [PATCH 10/69] Add a mechanism to post updates from fdb-kubernetes-monitor to pod annotations. Remove some of the local test data for fdb-kubernetes-monitor in favor of testing through a Kuberentes statefulset. --- .../.testdata/default_config.json | 1 + fdbkubernetesmonitor/.testdata/fdb.cluster | 1 - fdbkubernetesmonitor/.testdata/test_env.sh | 5 - fdbkubernetesmonitor/README.md | 25 ++ fdbkubernetesmonitor/go.mod | 5 +- fdbkubernetesmonitor/go.sum | 413 ++++++++++++++++++ fdbkubernetesmonitor/kubernetes.go | 185 ++++++++ fdbkubernetesmonitor/monitor.go | 35 +- .../{statefulset.yaml => config.yaml} | 61 ++- 9 files changed, 708 insertions(+), 23 deletions(-) delete mode 100644 fdbkubernetesmonitor/.testdata/fdb.cluster delete mode 100644 fdbkubernetesmonitor/.testdata/test_env.sh create mode 100644 fdbkubernetesmonitor/README.md create mode 100644 fdbkubernetesmonitor/kubernetes.go rename packaging/docker/kubernetes/{statefulset.yaml => config.yaml} (79%) diff --git a/fdbkubernetesmonitor/.testdata/default_config.json b/fdbkubernetesmonitor/.testdata/default_config.json index ecb09eca28d..86cb8361644 100644 --- a/fdbkubernetesmonitor/.testdata/default_config.json +++ b/fdbkubernetesmonitor/.testdata/default_config.json @@ -1,5 +1,6 @@ { "version": "6.3.0", + "serverCount": 1, "arguments": [ {"value": "--cluster_file"}, {"value": ".testdata/fdb.cluster"}, diff --git a/fdbkubernetesmonitor/.testdata/fdb.cluster b/fdbkubernetesmonitor/.testdata/fdb.cluster deleted file mode 100644 index 4b364771734..00000000000 --- a/fdbkubernetesmonitor/.testdata/fdb.cluster +++ /dev/null @@ -1 +0,0 @@ -test:test@127.0.0.1:4501 diff --git a/fdbkubernetesmonitor/.testdata/test_env.sh b/fdbkubernetesmonitor/.testdata/test_env.sh deleted file mode 100644 index ced881d3471..00000000000 --- a/fdbkubernetesmonitor/.testdata/test_env.sh +++ /dev/null @@ -1,5 +0,0 @@ -export FDB_PUBLIC_IP=127.0.0.1 -export FDB_POD_IP=127.0.0.1 -export FDB_ZONE_ID=localhost -export FDB_MACHINE_ID=localhost -export FDB_INSTANCE_ID=storage-1 diff --git a/fdbkubernetesmonitor/README.md b/fdbkubernetesmonitor/README.md new file mode 100644 index 00000000000..67e20401d85 --- /dev/null +++ b/fdbkubernetesmonitor/README.md @@ -0,0 +1,25 @@ + +This package provides a launcher program for running FoundationDB in Kubernetes. + +To test this, run the following commands from the root of the FoundationDB +repository: + + docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . + kubectl apply -f packaging/docker/kubernetes/config.yaml + # Wait for the pods to become ready + ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') + cat packaging/docker/kubernetes/config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - + kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite + # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. + kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd" + +You can then make changes to the data in the config map and update the fdbserver processes: + + kubectl apply -f packaging/docker/kubernetes/config.yaml + kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite + # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. + kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "kill; kill all; status" + +Once you are done, you can tear down the example with the following command: + + kubectl delete -f packaging/docker/kubernetes/config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example diff --git a/fdbkubernetesmonitor/go.mod b/fdbkubernetesmonitor/go.mod index 11722261683..d44f296e555 100644 --- a/fdbkubernetesmonitor/go.mod +++ b/fdbkubernetesmonitor/go.mod @@ -22,6 +22,9 @@ module github.com/apple/foundationdb/fdbkubernetesmonitor go 1.16 require ( - github.com/spf13/pflag v1.0.5 github.com/fsnotify/fsnotify v1.5.0 + github.com/spf13/pflag v1.0.5 + k8s.io/api v0.20.2 + k8s.io/apimachinery v0.20.2 + k8s.io/client-go v0.20.2 ) diff --git a/fdbkubernetesmonitor/go.sum b/fdbkubernetesmonitor/go.sum index e77e248c3e1..c0378c9592a 100644 --- a/fdbkubernetesmonitor/go.sum +++ b/fdbkubernetesmonitor/go.sum @@ -1,6 +1,419 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= +cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= +cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= +cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= +cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= +cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest v0.11.1/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= +github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= +github.com/Azure/go-autorest/autorest/adal v0.9.5/go.mod h1:B7KF7jKIeC9Mct5spmyCB/A8CG/sEz1vwIRGv/bbw7A= +github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= +github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= +github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= +github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= +github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.5.0 h1:NO5hkcB+srp1x6QmwvNZLeaOgbM8cmBTN32THzjvu2k= github.com/fsnotify/fsnotify v1.5.0/go.mod h1:BX0DCEr5pT4jm2CnQdVP1lFV521fcCNcyEeNp4DQQDk= +github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= +github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= +github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= +github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= +github.com/go-openapi/spec v0.19.3/go.mod h1:FpwSN1ksY1eteniUU7X0N/BgJ7a4WvBFVA8Lj9mJglo= +github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.2 h1:X2ev0eStA3AbceY54o37/0PQ/UWqKEiiO2dKL5OPaFM= +github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= +github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/gnostic v0.4.1 h1:DLJCy1n/vrD4HPjOvYcT8aYQXpPIzoRZONaYwyycI+I= +github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg= +github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= +github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= +github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= +github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0 h1:hb9wdF1z5waM+dSIICn1l0DkLVDT3hqhhQsDNUmHPRE= +golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= +golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2lTtcqevgzYNVt49waME= +golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c h1:F1jZWGFhYfh0Ci55sIpILtKKK8p3i2/krTr0H1rg74I= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc= +golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e h1:EHBhcS0mlXEAVwNyO2dLfjToGsyY4j24pTs2ScHnX7s= +golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5 h1:tycE03LOZYQNhDpS27tcQdAzLCVMaj7QT2SXxebnpCM= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= +google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +k8s.io/api v0.20.2 h1:y/HR22XDZY3pniu9hIFDLpUCPq2w5eQ6aV/VFQ7uJMw= +k8s.io/api v0.20.2/go.mod h1:d7n6Ehyzx+S+cE3VhTGfVNNqtGc/oL9DCdYYahlurV8= +k8s.io/apimachinery v0.20.2 h1:hFx6Sbt1oG0n6DZ+g4bFt5f6BoMkOjKWsQFu077M3Vg= +k8s.io/apimachinery v0.20.2/go.mod h1:WlLqWAHZGg07AeltaI0MV5uk1Omp8xaN0JGLY6gkRpU= +k8s.io/client-go v0.20.2 h1:uuf+iIAbfnCSw8IGAv/Rg0giM+2bOzHLOsbbrwrdhNQ= +k8s.io/client-go v0.20.2/go.mod h1:kH5brqWqp7HDxUFKoEgiI4v8G1xzbe9giaCenUWJzgE= +k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0= +k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= +k8s.io/klog/v2 v2.4.0 h1:7+X0fUguPyrKEC4WjH8iGDg3laWgMo5tMnRTIGTTxGQ= +k8s.io/klog/v2 v2.4.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= +k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAGcJo0Tvi+dK12EcqSLqcWsryKMpfM= +k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g7yaSHkYPkpgelw= +k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= +rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +sigs.k8s.io/structured-merge-diff/v4 v4.0.2 h1:YHQV7Dajm86OuqnIR6zAelnDWBRjo+YhYV9PmGrh1s8= +sigs.k8s.io/structured-merge-diff/v4 v4.0.2/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw= +sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= +sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/fdbkubernetesmonitor/kubernetes.go b/fdbkubernetesmonitor/kubernetes.go new file mode 100644 index 00000000000..63e9af76b09 --- /dev/null +++ b/fdbkubernetesmonitor/kubernetes.go @@ -0,0 +1,185 @@ +// kubernetes.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "strconv" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/kubernetes" + typedv1 "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/rest" +) + +const ( + // CurrentConfigurationAnnotation is the annotation we use to store the + // latest configuration. + CurrentConfigurationAnnotation = "foundationdb.org/launcher-current-configuration" + + // EnvironmentAnnotation is the annotation we use to store the environment + // variables. + EnvironmentAnnotation = "foundationdb.org/launcher-environment" + + // OutdatedConfigMapAnnotation is the annotation we read to get notified of + // outdated configuration. + OutdatedConfigMapAnnotation = "foundationdb.org/outdated-config-map-seen" +) + +// PodClient is a wrapper around the pod API. +type PodClient struct { + // podApi is the raw API + podApi typedv1.PodInterface + + // pod is the latest pod configuration + pod *corev1.Pod + + // TimestampFeed is a channel where the pod client will send updates with + // the values from OutdatedConfigMapAnnotation. + TimestampFeed chan int64 +} + +// CreatePodClient creates a new client for working with the pod object. +func CreatePodClient() (*PodClient, error) { + config, err := rest.InClusterConfig() + if err != nil { + return nil, err + } + client, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, err + } + + podApi := client.CoreV1().Pods(os.Getenv("FDB_POD_NAMESPACE")) + pod, err := podApi.Get(context.Background(), os.Getenv("FDB_POD_NAME"), metav1.GetOptions{ResourceVersion: "0"}) + if err != nil { + return nil, err + } + + podClient := &PodClient{podApi: podApi, pod: pod, TimestampFeed: make(chan int64, 10)} + err = podClient.watchPod() + if err != nil { + return nil, err + } + + return podClient, nil +} + +// retrieveEnvironmentVariables extracts the environment variables we have for +// an argument into a map. +func retrieveEnvironmentVariables(argument Argument, target map[string]string) { + if argument.Source != "" { + target[argument.Source] = os.Getenv(argument.Source) + } + if argument.Values != nil { + for _, childArgument := range argument.Values { + retrieveEnvironmentVariables(childArgument, target) + } + } +} + +// UpdateAnnotations updates annotations on the pod after loading new +// configuration. +func (client *PodClient) UpdateAnnotations(monitor *Monitor) error { + environment := make(map[string]string) + for _, argument := range monitor.ActiveConfiguration.Arguments { + retrieveEnvironmentVariables(argument, environment) + } + jsonEnvironment, err := json.Marshal(environment) + if err != nil { + return err + } + + patch := map[string]interface{}{ + "metadata": map[string]interface{}{ + "annotations": map[string]string{ + CurrentConfigurationAnnotation: string(monitor.ActiveConfigurationBytes), + EnvironmentAnnotation: string(jsonEnvironment), + }, + }, + } + + patchJson, err := json.Marshal(patch) + if err != nil { + return err + } + + pod, err := client.podApi.Patch(context.Background(), client.pod.Name, types.MergePatchType, patchJson, metav1.PatchOptions{}) + if err != nil { + return err + } + client.pod = pod + return nil +} + +// watchPod starts a watch on the pod. +func (client *PodClient) watchPod() error { + podWatch, err := client.podApi.Watch( + context.Background(), + metav1.ListOptions{ + Watch: true, + ResourceVersion: "0", + FieldSelector: fmt.Sprintf("metadata.name=%s", os.Getenv("FDB_POD_NAME")), + }, + ) + if err != nil { + return err + } + results := podWatch.ResultChan() + go func() { + for event := range results { + if event.Type == watch.Modified { + pod, valid := event.Object.(*corev1.Pod) + if !valid { + log.Printf("Error getting pod information from watch: %v", event) + } + client.processPodUpdate(pod) + } + } + }() + + return nil +} + +// processPodUpdate handles an update for a pod. +func (client *PodClient) processPodUpdate(pod *corev1.Pod) { + client.pod = pod + if pod.Annotations == nil { + return + } + annotation := client.pod.Annotations[OutdatedConfigMapAnnotation] + if annotation == "" { + return + } + timestamp, err := strconv.ParseInt(annotation, 10, 64) + if err != nil { + log.Printf("Error parsing annotation %s: %s", annotation, err) + return + } + + client.TimestampFeed <- timestamp +} diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index 3949a406819..a9a78e50960 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -52,6 +52,10 @@ type Monitor struct { // configuration. ActiveConfigurationBytes []byte + // LastConfigurationTime is the last time we successfully reloaded the + // configuration file. + LastConfigurationTime time.Time + // ProcessIDs stores the PIDs of the processes that are running. A PID of // zero will indicate that a process does not have a run loop. A PID of -1 // will indicate that a process has a run loop but is not currently running @@ -60,11 +64,26 @@ type Monitor struct { // Mutex defines a mutex around working with configuration. Mutex sync.Mutex + + // PodClient is a client for posting updates about this pod to + // Kubernetes. + PodClient *PodClient } // StartMonitor starts the monitor loop. func StartMonitor(configFile string, fdbserverPath string) { - monitor := &Monitor{ConfigFile: configFile, FDBServerPath: fdbserverPath} + podClient, err := CreatePodClient() + if err != nil { + panic(err) + } + + monitor := &Monitor{ + ConfigFile: configFile, + FDBServerPath: fdbserverPath, + PodClient: podClient, + } + + go func() { monitor.WatchPodTimestamps() }() monitor.Run() } @@ -107,6 +126,7 @@ func (monitor *Monitor) LoadConfiguration() { monitor.ActiveConfiguration = configuration monitor.ActiveConfigurationBytes = configurationBytes + monitor.LastConfigurationTime = time.Now() for processNumber := 1; processNumber <= configuration.ServerCount; processNumber++ { if monitor.ProcessesIDs[processNumber] == 0 { @@ -115,6 +135,11 @@ func (monitor *Monitor) LoadConfiguration() { go func() { monitor.RunProcess(tempNumber) }() } } + + err = monitor.PodClient.UpdateAnnotations(monitor) + if err != nil { + log.Printf("Error updating pod annotations: %s", err) + } } // RunProcess runs a loop to continually start and watch a process. @@ -241,3 +266,11 @@ func (monitor *Monitor) Run() { <-done } + +func (monitor *Monitor) WatchPodTimestamps() { + for timestamp := range monitor.PodClient.TimestampFeed { + if timestamp > monitor.LastConfigurationTime.Unix() { + monitor.LoadConfiguration() + } + } +} diff --git a/packaging/docker/kubernetes/statefulset.yaml b/packaging/docker/kubernetes/config.yaml similarity index 79% rename from packaging/docker/kubernetes/statefulset.yaml rename to packaging/docker/kubernetes/config.yaml index b8590dbc9e4..a47a7fe01e5 100644 --- a/packaging/docker/kubernetes/statefulset.yaml +++ b/packaging/docker/kubernetes/config.yaml @@ -23,19 +23,7 @@ # This is not a recommended way to run production clusters, but it can be useful # to test the image in development. # -# To start a cluster with this example, run the following steps: -# 1. Apply this file. -# 2. Wait for all pods to start. -# 3. Generate a connection string, using the following bash command: -# ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")'); echo test:test@$ips -# echo test:test@$ips -# 4. Update the ConfigMap below to have the results of that echo statement as -# the `fdb.cluster` entry, and change the `serverCount` field to `1`. -# 5. Apply the file again. -# 6. Watch the logs for the fdb-kubernetes-example pods to confirm that they -# have launched the fdbserver processes. -# 7. Exec into one of the pods, and run `fdbcli --exec "configure new double ssd"`. - +# For more information on using this file, see fdbkubernetesmonitor/doc.go apiVersion: apps/v1 kind: StatefulSet metadata: @@ -55,8 +43,17 @@ spec: spec: containers: - name: foundationdb - image: foundationdb/foundationdb-kubernetes:6.3.15 + image: foundationdb/foundationdb-kubernetes:latest + imagePullPolicy: IfNotPresent env: + - name: FDB_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: FDB_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: FDB_CLUSTER_FILE value: /var/fdb/data/fdb.cluster - name: FDB_PUBLIC_IP @@ -87,6 +84,7 @@ spec: mountPath: /var/fdb/dynamic-conf - name: data mountPath: /var/fdb/data + serviceAccountName: fdb-kubernetes-example volumes: - name: dynamic-conf configMap: @@ -109,7 +107,7 @@ data: fdb.cluster: "" config.json: | { - "serverCount": 0 + "serverCount": 0, "version": "6.3.15", "arguments": [ {"value": "--cluster_file"}, @@ -147,3 +145,36 @@ data: ]} ] } +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: fdb-kubernetes-example +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: fdb-kubernetes-example +rules: + - apiGroups: + - "" + resources: + - "pods" + verbs: + - "get" + - "watch" + - "update" +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: fdb-kubernetes-example +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: fdb-kubernetes-example +subjects: +- kind: ServiceAccount + name: fdb-kubernetes-example + + From 7c36123cf81cd909ab4bc4e59b59d1d6319302ac Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Sun, 22 Aug 2021 21:19:10 -0700 Subject: [PATCH 11/69] Adds structured logging in fdb-kubernetes-monitor. Adds a backoff window when restarting processes in fdb-kubernetes-monitor. --- fdbkubernetesmonitor/.testdata/fdb.cluster | 1 + fdbkubernetesmonitor/.testdata/test_env.sh | 7 + fdbkubernetesmonitor/README.md | 13 +- fdbkubernetesmonitor/config_test.go | 12 ++ fdbkubernetesmonitor/go.mod | 3 + fdbkubernetesmonitor/go.sum | 26 +++- fdbkubernetesmonitor/kubernetes.go | 9 +- fdbkubernetesmonitor/main.go | 16 ++- fdbkubernetesmonitor/monitor.go | 135 ++++++++++++------ packaging/docker/kubernetes/Dockerfile | 9 +- .../{config.yaml => test_config.yaml} | 22 ++- 11 files changed, 192 insertions(+), 61 deletions(-) create mode 100644 fdbkubernetesmonitor/.testdata/fdb.cluster create mode 100644 fdbkubernetesmonitor/.testdata/test_env.sh rename packaging/docker/kubernetes/{config.yaml => test_config.yaml} (94%) diff --git a/fdbkubernetesmonitor/.testdata/fdb.cluster b/fdbkubernetesmonitor/.testdata/fdb.cluster new file mode 100644 index 00000000000..4b364771734 --- /dev/null +++ b/fdbkubernetesmonitor/.testdata/fdb.cluster @@ -0,0 +1 @@ +test:test@127.0.0.1:4501 diff --git a/fdbkubernetesmonitor/.testdata/test_env.sh b/fdbkubernetesmonitor/.testdata/test_env.sh new file mode 100644 index 00000000000..13bfc76ec9a --- /dev/null +++ b/fdbkubernetesmonitor/.testdata/test_env.sh @@ -0,0 +1,7 @@ +export FDB_PUBLIC_IP=127.0.0.1 +export FDB_POD_IP=127.0.0.1 +export FDB_ZONE_ID=localhost +export FDB_MACHINE_ID=localhost +export FDB_INSTANCE_ID=storage-1 +export KUBERNETES_SERVICE_HOST=kubernetes.docker.internal +export KUBERNETES_SERVICE_PORT=6443 diff --git a/fdbkubernetesmonitor/README.md b/fdbkubernetesmonitor/README.md index 67e20401d85..103108c2181 100644 --- a/fdbkubernetesmonitor/README.md +++ b/fdbkubernetesmonitor/README.md @@ -5,21 +5,24 @@ To test this, run the following commands from the root of the FoundationDB repository: docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . - kubectl apply -f packaging/docker/kubernetes/config.yaml + kubectl apply -f packaging/docker/kubernetes/test_config.yaml # Wait for the pods to become ready ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') - cat packaging/docker/kubernetes/config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - + cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd" You can then make changes to the data in the config map and update the fdbserver processes: - kubectl apply -f packaging/docker/kubernetes/config.yaml + cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - + + # You can apply an annotation to speed up the propagation of config kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite - # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. + + # Watch the logs for the fdb-kubernetes-example pods to confirm that they have reloaded their configuration, and then do a bounce. kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "kill; kill all; status" Once you are done, you can tear down the example with the following command: - kubectl delete -f packaging/docker/kubernetes/config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example + kubectl delete -f packaging/docker/kubernetes/test_config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example diff --git a/fdbkubernetesmonitor/config_test.go b/fdbkubernetesmonitor/config_test.go index 9aca4043bfc..0820fe29ff4 100644 --- a/fdbkubernetesmonitor/config_test.go +++ b/fdbkubernetesmonitor/config_test.go @@ -24,6 +24,9 @@ import ( "os" "reflect" "testing" + + "github.com/go-logr/zapr" + "go.uber.org/zap" ) func loadConfigFromFile(path string) (*ProcessConfiguration, error) { @@ -99,4 +102,13 @@ func TestGeneratingArgumentForEnvironmentVariable(t *testing.T) { t.Fail() return } + + zapLogger, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + + log := zapr.NewLogger(zapLogger) + log.Info("JPB test", "key", "value") + t.Fail() } diff --git a/fdbkubernetesmonitor/go.mod b/fdbkubernetesmonitor/go.mod index d44f296e555..fec774f3277 100644 --- a/fdbkubernetesmonitor/go.mod +++ b/fdbkubernetesmonitor/go.mod @@ -23,7 +23,10 @@ go 1.16 require ( github.com/fsnotify/fsnotify v1.5.0 + github.com/go-logr/logr v0.4.0 + github.com/go-logr/zapr v0.4.0 github.com/spf13/pflag v1.0.5 + go.uber.org/zap v1.19.0 k8s.io/api v0.20.2 k8s.io/apimachinery v0.20.2 k8s.io/client-go v0.20.2 diff --git a/fdbkubernetesmonitor/go.sum b/fdbkubernetesmonitor/go.sum index c0378c9592a..1c021ce5283 100644 --- a/fdbkubernetesmonitor/go.sum +++ b/fdbkubernetesmonitor/go.sum @@ -36,6 +36,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0 github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY= +github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= +github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= @@ -62,8 +64,11 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= +github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/zapr v0.4.0 h1:uc1uML3hRYL9/ZZPdgHS/n8Nzo+eaYL/Efxkkamf7OM= +github.com/go-logr/zapr v0.4.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc= @@ -157,6 +162,8 @@ github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -170,12 +177,21 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/goleak v1.1.10 h1:z+mqJhf6ss6BSfSM671tgKyZBFPTTJM+HLxnhPC3wu0= +go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= +go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/zap v1.19.0 h1:mZQZefskPPCMIBCSEH0v2/iUqqLrYtaeqwD6FUGUnFE= +go.uber.org/zap v1.19.0/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -205,6 +221,7 @@ golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k= golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= @@ -303,6 +320,7 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -317,6 +335,7 @@ golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb h1:iKlO7ROJc6SttHKlxzwGytRtBUqX4VARrNTgP2YLX5M= golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -388,8 +407,9 @@ gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/fdbkubernetesmonitor/kubernetes.go b/fdbkubernetesmonitor/kubernetes.go index 63e9af76b09..dab1f23911a 100644 --- a/fdbkubernetesmonitor/kubernetes.go +++ b/fdbkubernetesmonitor/kubernetes.go @@ -23,10 +23,10 @@ import ( "context" "encoding/json" "fmt" - "log" "os" "strconv" + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -61,6 +61,9 @@ type PodClient struct { // TimestampFeed is a channel where the pod client will send updates with // the values from OutdatedConfigMapAnnotation. TimestampFeed chan int64 + + // Logger is the logger we use for this client. + Logger logr.Logger } // CreatePodClient creates a new client for working with the pod object. @@ -155,7 +158,7 @@ func (client *PodClient) watchPod() error { if event.Type == watch.Modified { pod, valid := event.Object.(*corev1.Pod) if !valid { - log.Printf("Error getting pod information from watch: %v", event) + client.Logger.Error(nil, "Error getting pod information from watch", "event", event) } client.processPodUpdate(pod) } @@ -177,7 +180,7 @@ func (client *PodClient) processPodUpdate(pod *corev1.Pod) { } timestamp, err := strconv.ParseInt(annotation, 10, 64) if err != nil { - log.Printf("Error parsing annotation %s: %s", annotation, err) + client.Logger.Error(err, "Error parsing annotation", "key", OutdatedConfigMapAnnotation, "rawAnnotation", annotation, err) return } diff --git a/fdbkubernetesmonitor/main.go b/fdbkubernetesmonitor/main.go index 7f82af04f23..1cb922d2e2b 100644 --- a/fdbkubernetesmonitor/main.go +++ b/fdbkubernetesmonitor/main.go @@ -22,20 +22,34 @@ package main import ( "fmt" + "github.com/go-logr/zapr" "github.com/spf13/pflag" + "go.uber.org/zap" ) var ( inputDir string fdbserverPath string monitorConfFile string + logPath string ) func main() { pflag.StringVar(&fdbserverPath, "fdbserver-path", "/usr/bin/fdbserver", "Path to the fdbserver binary") pflag.StringVar(&inputDir, "input-dir", ".", "Directory containing input files") pflag.StringVar(&monitorConfFile, "input-monitor-conf", "config.json", "Name of the file in the input directory that contains the monitor configuration") + pflag.StringVar(&logPath, "log-path", "", "Name of a file to send logs to. Logs will be sent to stdout in addition the file you pass in this argument. If this is blank, logs will only by sent to stdout") pflag.Parse() - StartMonitor(fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) + zapConfig := zap.NewProductionConfig() + if logPath != "" { + zapConfig.OutputPaths = append(zapConfig.OutputPaths, logPath) + } + zapLogger, err := zapConfig.Build() + if err != nil { + panic(err) + } + + logger := zapr.NewLogger(zapLogger) + StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) } diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index a9a78e50960..99d8e9c6989 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -20,9 +20,9 @@ package main import ( + "bufio" "encoding/json" "io" - "log" "os" "os/exec" "os/signal" @@ -31,11 +31,14 @@ import ( "time" "github.com/fsnotify/fsnotify" + "github.com/go-logr/logr" ) // errorBackoffSeconds is the time to wait after a process fails before starting // another process. -const errorBackoffSeconds = 5 +// This delay will only be applied when there has been more than one failure +// within this time window. +const errorBackoffSeconds = 60 // Monitor provides the main monitor loop type Monitor struct { @@ -60,7 +63,7 @@ type Monitor struct { // zero will indicate that a process does not have a run loop. A PID of -1 // will indicate that a process has a run loop but is not currently running // the subprocess. - ProcessesIDs []int + ProcessIDs []int // Mutex defines a mutex around working with configuration. Mutex sync.Mutex @@ -68,10 +71,13 @@ type Monitor struct { // PodClient is a client for posting updates about this pod to // Kubernetes. PodClient *PodClient + + // Logger is the logger instance for this monitor. + Logger logr.Logger } // StartMonitor starts the monitor loop. -func StartMonitor(configFile string, fdbserverPath string) { +func StartMonitor(logger logr.Logger, configFile string, fdbserverPath string) { podClient, err := CreatePodClient() if err != nil { panic(err) @@ -81,6 +87,7 @@ func StartMonitor(configFile string, fdbserverPath string) { ConfigFile: configFile, FDBServerPath: fdbserverPath, PodClient: podClient, + Logger: logger, } go func() { monitor.WatchPodTimestamps() }() @@ -91,36 +98,36 @@ func StartMonitor(configFile string, fdbserverPath string) { func (monitor *Monitor) LoadConfiguration() { file, err := os.Open(monitor.ConfigFile) if err != nil { - log.Print(err.Error()) + monitor.Logger.Error(err, "Error reading monitor config file", "monitorConfigPath", monitor.ConfigFile) return } defer file.Close() configuration := &ProcessConfiguration{} configurationBytes, err := io.ReadAll(file) if err != nil { - log.Print(err.Error()) + monitor.Logger.Error(err, "Error reading monitor configuration", "monitorConfigPath", monitor.ConfigFile) } err = json.Unmarshal(configurationBytes, configuration) if err != nil { - log.Print(err) + monitor.Logger.Error(err, "Error parsing monitor configuration", "rawConfiguration", string(configurationBytes)) return } _, err = configuration.GenerateArguments(1, nil) if err != nil { - log.Print(err) + monitor.Logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration) return } - log.Printf("Received new configuration file") + monitor.Logger.Info("Received new configuration file", "configuration", configuration) monitor.Mutex.Lock() defer monitor.Mutex.Unlock() - if monitor.ProcessesIDs == nil { - monitor.ProcessesIDs = make([]int, configuration.ServerCount+1) + if monitor.ProcessIDs == nil { + monitor.ProcessIDs = make([]int, configuration.ServerCount+1) } else { - for len(monitor.ProcessesIDs) <= configuration.ServerCount { - monitor.ProcessesIDs = append(monitor.ProcessesIDs, 0) + for len(monitor.ProcessIDs) <= configuration.ServerCount { + monitor.ProcessIDs = append(monitor.ProcessIDs, 0) } } @@ -129,8 +136,8 @@ func (monitor *Monitor) LoadConfiguration() { monitor.LastConfigurationTime = time.Now() for processNumber := 1; processNumber <= configuration.ServerCount; processNumber++ { - if monitor.ProcessesIDs[processNumber] == 0 { - monitor.ProcessesIDs[processNumber] = -1 + if monitor.ProcessIDs[processNumber] == 0 { + monitor.ProcessIDs[processNumber] = -1 tempNumber := processNumber go func() { monitor.RunProcess(tempNumber) }() } @@ -138,18 +145,20 @@ func (monitor *Monitor) LoadConfiguration() { err = monitor.PodClient.UpdateAnnotations(monitor) if err != nil { - log.Printf("Error updating pod annotations: %s", err) + monitor.Logger.Error(err, "Error updating pod annotations") } } // RunProcess runs a loop to continually start and watch a process. func (monitor *Monitor) RunProcess(processNumber int) { - log.Printf("Starting run loop for subprocess %d", processNumber) + pid := 0 + logger := monitor.Logger.WithValues("processNumber", processNumber, "area", "RunProcess") + logger.Info("Starting run loop") for { monitor.Mutex.Lock() if monitor.ActiveConfiguration.ServerCount < processNumber { - log.Printf("Terminating run loop for subprocess %d", processNumber) - monitor.ProcessesIDs[processNumber] = 0 + logger.Info("Terminating run loop") + monitor.ProcessIDs[processNumber] = 0 monitor.Mutex.Unlock() return } @@ -158,42 +167,85 @@ func (monitor *Monitor) RunProcess(processNumber int) { arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil) arguments = append([]string{monitor.FDBServerPath}, arguments...) if err != nil { - log.Print(err) + logger.Error(err, "Error generating arguments for subprocess", "configuration", monitor.ActiveConfiguration) time.Sleep(errorBackoffSeconds * time.Second) } cmd := exec.Cmd{ - Path: arguments[0], - Args: arguments, - Stdout: os.Stdout, - Stderr: os.Stderr, + Path: arguments[0], + Args: arguments, + } + + logger.Info("Starting subprocess", "arguments", arguments) + + stdout, err := cmd.StdoutPipe() + if err != nil { + logger.Error(err, "Error getting stdout from subprocess") + } + + stderr, err := cmd.StderrPipe() + if err != nil { + logger.Error(err, "Error getting stderr from subprocess") } - log.Printf("Starting subprocess #%d: %v", processNumber, arguments) err = cmd.Start() if err != nil { - log.Printf("Error from subprocess %d: %s", processNumber, err.Error()) - log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds) + logger.Error(err, "Error starting subprocess") time.Sleep(errorBackoffSeconds * time.Second) continue } + if cmd.Process != nil { + pid = cmd.Process.Pid + } else { + logger.Error(nil, "No Process information availale for subprocess") + } + + startTime := time.Now() + logger.Info("Subprocess started", "PID", pid) + monitor.Mutex.Lock() - monitor.ProcessesIDs[processNumber] = cmd.Process.Pid + monitor.ProcessIDs[processNumber] = pid monitor.Mutex.Unlock() - err = cmd.Wait() - log.Printf("Subprocess #%d terminated", processNumber) + if stdout != nil { + stdoutScanner := bufio.NewScanner(stdout) + go func() { + for stdoutScanner.Scan() { + logger.Info("Subprocess output", "msg", stdoutScanner.Text(), "PID", pid) + } + }() + } + + if stderr != nil { + stderrScanner := bufio.NewScanner(stderr) + go func() { + for stderrScanner.Scan() { + logger.Error(nil, "Subprocess error log", "msg", stderrScanner.Text(), "PID", pid) + } + }() + } + err = cmd.Wait() if err != nil { - log.Printf("Error from subprocess #%d: %s", processNumber, err.Error()) + logger.Error(err, "Error from subprocess", "PID", pid) + } + exitCode := -1 + if cmd.ProcessState != nil { + exitCode = cmd.ProcessState.ExitCode() } + logger.Info("Subprocess terminated", "exitCode", exitCode, "PID", pid) + + endTime := time.Now() monitor.Mutex.Lock() - monitor.ProcessesIDs[processNumber] = -1 + monitor.ProcessIDs[processNumber] = -1 monitor.Mutex.Unlock() - log.Printf("Subprocess #%d will restart in %d seconds", processNumber, errorBackoffSeconds) - time.Sleep(errorBackoffSeconds * time.Second) + processDuration := endTime.Sub(startTime) + if processDuration.Seconds() < errorBackoffSeconds { + logger.Info("Backing off from restarting subprocess", "backOffTimeSeconds", errorBackoffSeconds, "lastExecutionDurationSeconds", processDuration) + time.Sleep(errorBackoffSeconds * time.Second) + } } } @@ -205,7 +257,7 @@ func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) { if !ok { return } - log.Printf("Detected event on monitor conf file: %v", event) + monitor.Logger.Info("Detected event on monitor conf file", "event", event) if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create { monitor.LoadConfiguration() } else if event.Op&fsnotify.Remove == fsnotify.Remove { @@ -219,7 +271,7 @@ func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) { if !ok { return } - log.Print(err) + monitor.Logger.Error(err, "Error watching for file system events") } } } @@ -232,18 +284,19 @@ func (monitor *Monitor) Run() { go func() { latestSignal := <-signals - log.Printf("Received signal %v", latestSignal) - for processNumber, processID := range monitor.ProcessesIDs { + monitor.Logger.Info("Received system signal", "signal", latestSignal) + for processNumber, processID := range monitor.ProcessIDs { if processID > 0 { + subprocessLogger := monitor.Logger.WithValues("processNumber", processNumber, "PID", processID) process, err := os.FindProcess(processID) if err != nil { - log.Printf("Error finding subprocess #%d (PID %d): %s", processNumber, processID, err.Error()) + subprocessLogger.Error(err, "Error finding subprocess") continue } - log.Printf("Sending signal %v to subprocess #%d (PID %d)", latestSignal, processNumber, processID) + subprocessLogger.Info("Sending signal to subprocess", "signal", latestSignal) err = process.Signal(latestSignal) if err != nil { - log.Printf("Error signaling subprocess #%d (PID %d): %s", processNumber, processID, err.Error()) + subprocessLogger.Error(err, "Error signaling subprocess") continue } } diff --git a/packaging/docker/kubernetes/Dockerfile b/packaging/docker/kubernetes/Dockerfile index 6d8feab19c7..bee0369a29a 100644 --- a/packaging/docker/kubernetes/Dockerfile +++ b/packaging/docker/kubernetes/Dockerfile @@ -69,8 +69,15 @@ RUN rm -rf /mnt/website && rm -r /var/fdb/tmp # Install the kubernetes monitor binary COPY --from=go-build /fdb-kubernetes-monitor /usr/bin/ -VOLUME /var/fdb/data +# Set up a non-root user + +RUN groupadd --gid 4059 fdb && \ + useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb && \ + chown -R fdb:fdb /var/fdb # Runtime Configuration Options +USER fdb +WORKDIR /var/fdb ENTRYPOINT ["/usr/bin/fdb-kubernetes-monitor"] +VOLUME /var/fdb/data diff --git a/packaging/docker/kubernetes/config.yaml b/packaging/docker/kubernetes/test_config.yaml similarity index 94% rename from packaging/docker/kubernetes/config.yaml rename to packaging/docker/kubernetes/test_config.yaml index a47a7fe01e5..2034f6282df 100644 --- a/packaging/docker/kubernetes/config.yaml +++ b/packaging/docker/kubernetes/test_config.yaml @@ -23,7 +23,7 @@ # This is not a recommended way to run production clusters, but it can be useful # to test the image in development. # -# For more information on using this file, see fdbkubernetesmonitor/doc.go +# For more information on using this file, see fdbkubernetesmonitor/README.md apiVersion: apps/v1 kind: StatefulSet metadata: @@ -45,6 +45,11 @@ spec: - name: foundationdb image: foundationdb/foundationdb-kubernetes:latest imagePullPolicy: IfNotPresent + args: + - --input-dir + - /var/fdb/dynamic-conf + - --log-path + - /var/fdb/logs/monitor.log env: - name: FDB_POD_NAME valueFrom: @@ -76,19 +81,20 @@ spec: valueFrom: fieldRef: fieldPath: metadata.name - args: - - --input-dir - - /var/fdb/dynamic-conf volumeMounts: - name: dynamic-conf mountPath: /var/fdb/dynamic-conf - name: data mountPath: /var/fdb/data + - name: logs + mountPath: /var/fdb/logs serviceAccountName: fdb-kubernetes-example volumes: - name: dynamic-conf configMap: name: fdb-kubernetes-example-config + - name: logs + emptyDir: {} volumeClaimTemplates: - metadata: name: data @@ -142,7 +148,11 @@ data: {"type": "Environment", "source": "FDB_INSTANCE_ID"}, {"value": "-"}, {"type": "ProcessNumber"} - ]} + ]}, + {"value": "--logdir"}, + {"value": "/var/fdb/logs"}, + {"value": "--trace_format"}, + {"value": "json"} ] } --- @@ -176,5 +186,3 @@ roleRef: subjects: - kind: ServiceAccount name: fdb-kubernetes-example - - From c7858d24410073a8b0bd00add04d7889b7da85ed Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Mon, 23 Aug 2021 00:31:18 -0700 Subject: [PATCH 12/69] Add the init and sidecar modes for fdb-kubernetes-monitor. Add support for using a special binary path during upgrades in fdb-kubernetes-monitor. --- fdbkubernetesmonitor/README.md | 3 +- fdbkubernetesmonitor/config.go | 12 +- fdbkubernetesmonitor/config_test.go | 39 +++++-- fdbkubernetesmonitor/copy.go | 100 +++++++++++++++++ fdbkubernetesmonitor/main.go | 110 ++++++++++++++++++- fdbkubernetesmonitor/monitor.go | 20 +++- packaging/docker/kubernetes/Dockerfile | 7 +- packaging/docker/kubernetes/test_config.yaml | 99 ++++++++++++++++- 8 files changed, 363 insertions(+), 27 deletions(-) create mode 100644 fdbkubernetesmonitor/copy.go diff --git a/fdbkubernetesmonitor/README.md b/fdbkubernetesmonitor/README.md index 103108c2181..95a54866b68 100644 --- a/fdbkubernetesmonitor/README.md +++ b/fdbkubernetesmonitor/README.md @@ -4,7 +4,8 @@ This package provides a launcher program for running FoundationDB in Kubernetes. To test this, run the following commands from the root of the FoundationDB repository: - docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . + docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.13 --build-arg FDB_LIBRARY_VERSIONS="6.3.13 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . + docker build -t foundationdb/foundationdb-kubernetes:latest-sidecar --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . kubectl apply -f packaging/docker/kubernetes/test_config.yaml # Wait for the pods to become ready ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') diff --git a/fdbkubernetesmonitor/config.go b/fdbkubernetesmonitor/config.go index 2c89826cb2d..0899a145d9a 100644 --- a/fdbkubernetesmonitor/config.go +++ b/fdbkubernetesmonitor/config.go @@ -33,6 +33,9 @@ type ProcessConfiguration struct { // ServerCount defines the number of processes to start. ServerCount int `json:"serverCount,omitempty"` + // BinaryPath provides the path to the binary to launch. + BinaryPath string `json:"-"` + // Arguments provides the arugments to the process. Arguments []Argument `json:"arguments,omitempty"` } @@ -125,13 +128,16 @@ func (argument Argument) GenerateArgument(processNumber int, env map[string]stri // GenerateArguments intreprets the arguments in the process configuration and // generates a command invocation. func (configuration *ProcessConfiguration) GenerateArguments(processNumber int, env map[string]string) ([]string, error) { - results := make([]string, len(configuration.Arguments)) - for indexOfArgument, argument := range configuration.Arguments { + results := make([]string, 0, len(configuration.Arguments)+1) + if configuration.BinaryPath != "" { + results = append(results, configuration.BinaryPath) + } + for _, argument := range configuration.Arguments { result, err := argument.GenerateArgument(processNumber, env) if err != nil { return nil, err } - results[indexOfArgument] = result + results = append(results, result) } return results, nil } diff --git a/fdbkubernetesmonitor/config_test.go b/fdbkubernetesmonitor/config_test.go index 0820fe29ff4..d0ea625807d 100644 --- a/fdbkubernetesmonitor/config_test.go +++ b/fdbkubernetesmonitor/config_test.go @@ -24,9 +24,6 @@ import ( "os" "reflect" "testing" - - "github.com/go-logr/zapr" - "go.uber.org/zap" ) func loadConfigFromFile(path string) (*ProcessConfiguration, error) { @@ -74,6 +71,33 @@ func TestGeneratingArgumentsForDefaultConfig(t *testing.T) { t.Logf("Expected arguments %v, but got arguments %v", expectedArguments, arguments) t.Fail() } + + config.BinaryPath = "/usr/bin/fdbserver" + + arguments, err = config.GenerateArguments(1, map[string]string{ + "FDB_PUBLIC_IP": "10.0.0.1", + "FDB_POD_IP": "192.168.0.1", + "FDB_ZONE_ID": "zone1", + "FDB_INSTANCE_ID": "storage-1", + }) + if err != nil { + t.Error(err) + return + } + + expectedArguments = []string{ + "/usr/bin/fdbserver", + "--cluster_file", ".testdata/fdb.cluster", + "--public_address", "10.0.0.1:4501", "--listen_address", "192.168.0.1:4501", + "--datadir", ".testdata/data/1", "--class", "storage", + "--locality_zoneid", "zone1", "--locality_instance-id", "storage-1", + "--locality_process-id", "storage-1-1", + } + + if !reflect.DeepEqual(arguments, expectedArguments) { + t.Logf("Expected arguments %v, but got arguments %v", expectedArguments, arguments) + t.Fail() + } } func TestGeneratingArgumentForEnvironmentVariable(t *testing.T) { @@ -102,13 +126,4 @@ func TestGeneratingArgumentForEnvironmentVariable(t *testing.T) { t.Fail() return } - - zapLogger, err := zap.NewDevelopment() - if err != nil { - panic(err) - } - - log := zapr.NewLogger(zapLogger) - log.Info("JPB test", "key", "value") - t.Fail() } diff --git a/fdbkubernetesmonitor/copy.go b/fdbkubernetesmonitor/copy.go new file mode 100644 index 00000000000..80074bc0f13 --- /dev/null +++ b/fdbkubernetesmonitor/copy.go @@ -0,0 +1,100 @@ +// copy.go +// +// This source file is part of the FoundationDB open source project +// +// Copyright 2021 Apple Inc. and the FoundationDB project authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package main + +import ( + "fmt" + "io" + "os" + "path" + + "github.com/go-logr/logr" +) + +const ( + bufferSize = 1024 +) + +// copyFile copies a file into the output directory. +func copyFile(logger logr.Logger, inputPath string, outputPath string, required bool) error { + logger.Info("Copying file", "inputPath", inputPath, "outputPath", outputPath) + inputFile, err := os.Open(inputPath) + if err != nil { + logger.Error(err, "Error opening file", "path", inputPath) + return err + } + defer inputFile.Close() + + inputInfo, err := inputFile.Stat() + if err != nil { + logger.Error(err, "Error getting stats for file", "path", inputPath) + return err + } + + if required && inputInfo.Size() == 0 { + return fmt.Errorf("File %s is empty", inputPath) + } + + outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY, inputInfo.Mode()) + if err != nil { + return err + } + defer outputFile.Close() + + var buffer = make([]byte, bufferSize) + for { + readLength, readError := inputFile.Read(buffer) + if readError == io.EOF { + break + } + if readError != nil { + logger.Error(readError, "Error reading file", "path", inputPath) + return readError + } + + _, writeError := outputFile.Write(buffer[:readLength]) + if writeError != nil { + logger.Error(writeError, "Error writing file", "path", outputPath) + return writeError + } + } + return nil +} + +// CopyFiles copies a list of files into the output directory. +func CopyFiles(logger logr.Logger, outputDir string, copyDetails map[string]string, requiredCopies map[string]bool) error { + for inputPath, outputSubpath := range copyDetails { + if outputSubpath == "" { + outputSubpath = path.Base(inputPath) + } + outputPath := path.Join(outputDir, outputSubpath) + err := os.MkdirAll(path.Dir(outputPath), os.ModeDir|os.ModePerm) + if err != nil { + return err + } + + required := requiredCopies[inputPath] + err = copyFile(logger, inputPath, outputPath, required) + if err != nil { + return err + } + } + return nil +} diff --git a/fdbkubernetesmonitor/main.go b/fdbkubernetesmonitor/main.go index 1cb922d2e2b..48e11361fa1 100644 --- a/fdbkubernetesmonitor/main.go +++ b/fdbkubernetesmonitor/main.go @@ -21,6 +21,9 @@ package main import ( "fmt" + "os" + "path" + "strings" "github.com/go-logr/zapr" "github.com/spf13/pflag" @@ -28,17 +31,48 @@ import ( ) var ( - inputDir string - fdbserverPath string - monitorConfFile string - logPath string + inputDir string + fdbserverPath string + versionFilePath string + sharedBinaryDir string + monitorConfFile string + logPath string + executionModeString string + outputDir string + copyFiles []string + copyBinaries []string + binaryOutputDirectory string + copyLibraries []string + copyPrimaryLibrary string + requiredCopyFiles []string + mainContainerVersion string + currentContainerVersion string +) + +type executionMode string + +const ( + executionModeLauncher executionMode = "launcher" + executionModeInit executionMode = "init" + executionModeSidecar executionMode = "sidecar" ) func main() { + pflag.StringVar(&executionModeString, "mode", "launcher", "Execution mode. Valid options are launcher, sidecar, and init") pflag.StringVar(&fdbserverPath, "fdbserver-path", "/usr/bin/fdbserver", "Path to the fdbserver binary") pflag.StringVar(&inputDir, "input-dir", ".", "Directory containing input files") pflag.StringVar(&monitorConfFile, "input-monitor-conf", "config.json", "Name of the file in the input directory that contains the monitor configuration") pflag.StringVar(&logPath, "log-path", "", "Name of a file to send logs to. Logs will be sent to stdout in addition the file you pass in this argument. If this is blank, logs will only by sent to stdout") + pflag.StringVar(&outputDir, "output-dir", ".", "Directory to copy files into") + pflag.StringArrayVar(©Files, "copy-file", nil, "A list of files to copy") + pflag.StringArrayVar(©Binaries, "copy-binary", nil, "A list of binaries to copy from /usr/bin") + pflag.StringVar(&versionFilePath, "version-file", "/var/fdb/version", "Path to a file containing the current FDB version") + pflag.StringVar(&sharedBinaryDir, "shared-binary-dir", "/var/fdb/shared-binaries/bin", "A directory containing binaries that are copied from a sidecar process") + pflag.StringVar(&binaryOutputDirectory, "binary-output-dir", "", "A subdirectory within $(output-dir)/bin to store binaries in. This defaults to the value in /var/fdb/version") + pflag.StringArrayVar(©Libraries, "copy-library", nil, "A list of libraries to copy from /usr/lib/fdb/multiversion to $(output-dir)/lib/multiversion") + pflag.StringVar(©PrimaryLibrary, "copy-primary-library", "", "A library to copy from /usr/lib/fdb/multiversion to $(output-dir)/lib. This file will be renamed to libfdb_c.so") + pflag.StringArrayVar(&requiredCopyFiles, "require-not-empty", nil, "When copying this file, exit with an error if the file is empty") + pflag.StringVar(&mainContainerVersion, "main-container-version", "", "For sidecar mode, this specifies the version of the main container. If this is equal to the current container version, no files will be copied") pflag.Parse() zapConfig := zap.NewProductionConfig() @@ -50,6 +84,72 @@ func main() { panic(err) } + versionBytes, err := os.ReadFile(versionFilePath) + if err != nil { + panic(err) + } + currentContainerVersion = strings.TrimSpace(string(versionBytes)) + logger := zapr.NewLogger(zapLogger) - StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) + copyDetails, requiredCopies, err := getCopyDetails() + if err != nil { + logger.Error(err, "Error getting list of files to copy") + os.Exit(1) + } + + mode := executionMode(executionModeString) + if mode == executionModeLauncher { + StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) + } else if mode == executionModeInit { + err = CopyFiles(logger, outputDir, copyDetails, requiredCopies) + if err != nil { + logger.Error(err, "Error copying files") + os.Exit(1) + } + } else if mode == executionModeSidecar { + if mainContainerVersion != currentContainerVersion { + err = CopyFiles(logger, outputDir, copyDetails, requiredCopies) + if err != nil { + logger.Error(err, "Error copying files") + os.Exit(1) + } + done := make(chan bool) + <-done + } + } else { + logger.Error(nil, "Unknown execution mode", "mode", mode) + os.Exit(1) + } +} + +func getCopyDetails() (map[string]string, map[string]bool, error) { + copyDetails := make(map[string]string, len(copyFiles)+len(copyBinaries)) + + for _, filePath := range copyFiles { + copyDetails[path.Join(inputDir, filePath)] = "" + } + if copyBinaries != nil { + if binaryOutputDirectory == "" { + binaryOutputDirectory = currentContainerVersion + } + for _, copyBinary := range copyBinaries { + copyDetails[fmt.Sprintf("/usr/bin/%s", copyBinary)] = path.Join("bin", binaryOutputDirectory, copyBinary) + } + } + for _, library := range copyLibraries { + copyDetails[fmt.Sprintf("/usr/lib/fdb/multiversion/libfdb_c_%s.so", library)] = path.Join("lib", "multiversion", fmt.Sprintf("libfdb_c_%s.so", library)) + } + if copyPrimaryLibrary != "" { + copyDetails[fmt.Sprintf("/usr/lib/fdb/multiversion/libfdb_c_%s.so", copyPrimaryLibrary)] = path.Join("lib", "libfdb_c.so") + } + requiredCopyMap := make(map[string]bool, len(requiredCopyFiles)) + for _, filePath := range requiredCopyFiles { + fullFilePath := path.Join(inputDir, filePath) + _, present := copyDetails[fullFilePath] + if !present { + return nil, nil, fmt.Errorf("File %s is required, but is not in the --copy-file list", filePath) + } + requiredCopyMap[fullFilePath] = true + } + return copyDetails, requiredCopyMap, nil } diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index 99d8e9c6989..32d68d176c9 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -26,6 +26,7 @@ import ( "os" "os/exec" "os/signal" + "path" "sync" "syscall" "time" @@ -113,9 +114,25 @@ func (monitor *Monitor) LoadConfiguration() { return } + if currentContainerVersion == configuration.Version { + configuration.BinaryPath = monitor.FDBServerPath + } else { + configuration.BinaryPath = path.Join(sharedBinaryDir, configuration.Version, "fdbserver") + } + + binaryStat, err := os.Stat(configuration.BinaryPath) + if err != nil { + monitor.Logger.Error(err, "Error checking binary path for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath) + return + } + if binaryStat.Mode()&0o100 == 0 { + monitor.Logger.Error(nil, "New binary path is not executable", "configuration", configuration, "binaryPath", configuration.BinaryPath) + return + } + _, err = configuration.GenerateArguments(1, nil) if err != nil { - monitor.Logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration) + monitor.Logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath) return } @@ -165,7 +182,6 @@ func (monitor *Monitor) RunProcess(processNumber int) { monitor.Mutex.Unlock() arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil) - arguments = append([]string{monitor.FDBServerPath}, arguments...) if err != nil { logger.Error(err, "Error generating arguments for subprocess", "configuration", monitor.ActiveConfiguration) time.Sleep(errorBackoffSeconds * time.Second) diff --git a/packaging/docker/kubernetes/Dockerfile b/packaging/docker/kubernetes/Dockerfile index bee0369a29a..2f6d4ca026c 100644 --- a/packaging/docker/kubernetes/Dockerfile +++ b/packaging/docker/kubernetes/Dockerfile @@ -57,11 +57,12 @@ COPY packaging/docker/website /mnt/website/ RUN mkdir -p /var/fdb/logs && mkdir -p /var/fdb/tmp && \ curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz | tar zxf - --strip-components=1 && \ chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ - mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin + mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin && \ + echo ${FDB_VERSION} > /var/fdb/version # Install additional FoundationDB Client Libraries -ADD packaging/docker/release/download_multiversion_libraries.bash /var/fdb/tmp -RUN bash /var/fdb/tmp/download_multiversion_libraries.bash $FDB_WEBSITE $FDB_LIBRARY_VERSIONS +RUN mkdir -p /usr/lib/fdb/multiversion && \ + for version in $FDB_LIBRARY_VERSIONS; do curl $FDB_WEBSITE/downloads/$version/linux/libfdb_c_$version.so -o /usr/lib/fdb/multiversion/libfdb_c_${version%.*}.so; done # Clean up temporary directories RUN rm -rf /mnt/website && rm -r /var/fdb/tmp diff --git a/packaging/docker/kubernetes/test_config.yaml b/packaging/docker/kubernetes/test_config.yaml index 2034f6282df..d96f883648c 100644 --- a/packaging/docker/kubernetes/test_config.yaml +++ b/packaging/docker/kubernetes/test_config.yaml @@ -84,10 +84,33 @@ spec: volumeMounts: - name: dynamic-conf mountPath: /var/fdb/dynamic-conf + - name: shared-binaries + mountPath: /var/fdb/shared-binaries - name: data mountPath: /var/fdb/data - name: logs mountPath: /var/fdb/logs + - name: foundationdb-sidecar + image: foundationdb/foundationdb-kubernetes:latest-sidecar + imagePullPolicy: IfNotPresent + args: + - --mode + - sidecar + - --main-container-version + - 6.3.13 + - --output-dir + - /var/fdb/shared-binaries + - --copy-binary + - fdbserver + - --copy-binary + - fdbcli + - --log-path + - /var/fdb/logs/sidecar.log + volumeMounts: + - name: shared-binaries + mountPath: /var/fdb/shared-binaries + - name: logs + mountPath: /var/fdb/logs serviceAccountName: fdb-kubernetes-example volumes: - name: dynamic-conf @@ -95,6 +118,8 @@ spec: name: fdb-kubernetes-example-config - name: logs emptyDir: {} + - name: shared-binaries + emptyDir: {} volumeClaimTemplates: - metadata: name: data @@ -114,7 +139,7 @@ data: config.json: | { "serverCount": 0, - "version": "6.3.15", + "version": "6.3.13", "arguments": [ {"value": "--cluster_file"}, {"value": "/var/fdb/data/fdb.cluster"}, @@ -186,3 +211,75 @@ roleRef: subjects: - kind: ServiceAccount name: fdb-kubernetes-example +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: fdb-kubernetes-example-client +spec: + replicas: 2 + revisionHistoryLimit: 1 + selector: + matchLabels: + app: fdb-kubernetes-example-client + template: + metadata: + labels: + app: fdb-kubernetes-example-client + name: fdb-kubernetes-example-client + spec: + volumes: + - name: config-map + configMap: + name: fdb-kubernetes-example-config + items: + - key: fdb.cluster + path: fdb.cluster + - name: dynamic-conf + emptyDir: {} + initContainers: + - name: foundationdb-kubernetes-init + image: foundationdb/foundationdb-kubernetes:latest + imagePullPolicy: IfNotPresent + args: + - "--mode" + - "init" + - "--input-dir" + - "/var/input-files" + - "--output-dir" + - "/var/output-files" + - "--copy-file" + - "fdb.cluster" + - "--require-not-empty" + - "fdb.cluster" + - "--copy-library" + - "6.1" + - "--copy-library" + - "6.2" + - "--copy-primary-library" + - "6.3" + - "--copy-binary" + - "fdbcli" + volumeMounts: + - name: config-map + mountPath: /var/input-files + - name: dynamic-conf + mountPath: /var/output-files + containers: + - name: client + image: foundationdb/foundationdb-sample-python-app:latest + imagePullPolicy: Always + env: + - name: FDB_CLUSTER_FILE + value: /var/dynamic-conf/fdb.cluster + - name: FDB_API_VERSION + value: "610" + - name: FDB_NETWORK_OPTION_TRACE_LOG_GROUP + value: fdb-kubernetes-example-client + - name: FDB_NETWORK_OPTION_EXTERNAL_CLIENT_DIRECTORY + value: /var/dynamic-conf/lib/multiversion + - name: LD_LIBRARY_PATH + value: /var/dynamic-conf/lib + volumeMounts: + - name: dynamic-conf + mountPath: /var/dynamic-conf From f8ec3cc27d5ca62a06e432aac63b4052b80285e7 Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Mon, 23 Aug 2021 01:11:25 -0700 Subject: [PATCH 13/69] Add an option to load an additional file of environment variables in fdb-kubernetes-monitor. --- .../.testdata/default_config.json | 2 +- fdbkubernetesmonitor/config.go | 3 +- fdbkubernetesmonitor/main.go | 35 ++++++++++++++++++- fdbkubernetesmonitor/monitor.go | 21 +++++------ 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/fdbkubernetesmonitor/.testdata/default_config.json b/fdbkubernetesmonitor/.testdata/default_config.json index 86cb8361644..60d337c4c58 100644 --- a/fdbkubernetesmonitor/.testdata/default_config.json +++ b/fdbkubernetesmonitor/.testdata/default_config.json @@ -1,5 +1,5 @@ { - "version": "6.3.0", + "version": "6.3.15", "serverCount": 1, "arguments": [ {"value": "--cluster_file"}, diff --git a/fdbkubernetesmonitor/config.go b/fdbkubernetesmonitor/config.go index 0899a145d9a..ac388f10dd7 100644 --- a/fdbkubernetesmonitor/config.go +++ b/fdbkubernetesmonitor/config.go @@ -113,7 +113,8 @@ func (argument Argument) GenerateArgument(processNumber int, env map[string]stri var present bool if env != nil { value, present = env[argument.Source] - } else { + } + if !present { value, present = os.LookupEnv(argument.Source) } if !present { diff --git a/fdbkubernetesmonitor/main.go b/fdbkubernetesmonitor/main.go index 48e11361fa1..1237ffa81b1 100644 --- a/fdbkubernetesmonitor/main.go +++ b/fdbkubernetesmonitor/main.go @@ -20,11 +20,14 @@ package main import ( + "bufio" "fmt" "os" "path" + "regexp" "strings" + "github.com/go-logr/logr" "github.com/go-logr/zapr" "github.com/spf13/pflag" "go.uber.org/zap" @@ -47,6 +50,7 @@ var ( requiredCopyFiles []string mainContainerVersion string currentContainerVersion string + additionalEnvFile string ) type executionMode string @@ -73,6 +77,7 @@ func main() { pflag.StringVar(©PrimaryLibrary, "copy-primary-library", "", "A library to copy from /usr/lib/fdb/multiversion to $(output-dir)/lib. This file will be renamed to libfdb_c.so") pflag.StringArrayVar(&requiredCopyFiles, "require-not-empty", nil, "When copying this file, exit with an error if the file is empty") pflag.StringVar(&mainContainerVersion, "main-container-version", "", "For sidecar mode, this specifies the version of the main container. If this is equal to the current container version, no files will be copied") + pflag.StringVar(&additionalEnvFile, "additional-env-file", "", "A file with additional environment variables to use when interpreting the monitor configuration") pflag.Parse() zapConfig := zap.NewProductionConfig() @@ -99,7 +104,12 @@ func main() { mode := executionMode(executionModeString) if mode == executionModeLauncher { - StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), fdbserverPath) + customEnvironment, err := loadAdditionalEnvironment(logger) + if err != nil { + logger.Error(err, "Error loading additional environment") + os.Exit(1) + } + StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), customEnvironment) } else if mode == executionModeInit { err = CopyFiles(logger, outputDir, copyDetails, requiredCopies) if err != nil { @@ -153,3 +163,26 @@ func getCopyDetails() (map[string]string, map[string]bool, error) { } return copyDetails, requiredCopyMap, nil } + +func loadAdditionalEnvironment(logger logr.Logger) (map[string]string, error) { + var customEnvironment = make(map[string]string) + environmentPattern := regexp.MustCompile(`export ([A-Za-z0-9_]+)=([^\n]*)`) + if additionalEnvFile != "" { + file, err := os.Open(additionalEnvFile) + if err != nil { + return nil, err + } + + envScanner := bufio.NewScanner(file) + for envScanner.Scan() { + envLine := envScanner.Text() + matches := environmentPattern.FindStringSubmatch(envLine) + if matches == nil || envLine == "" { + logger.Error(nil, "Environment file contains line that we cannot parse", "line", envLine, "environmentPattern", environmentPattern) + continue + } + customEnvironment[matches[1]] = matches[2] + } + } + return customEnvironment, nil +} diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index 32d68d176c9..a9649d68e97 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -46,8 +46,9 @@ type Monitor struct { // ConfigFile defines the path to the config file to load. ConfigFile string - // FDBServerPath defines the path to the fdbserver binary. - FDBServerPath string + // CustomEnvironment defines the custom environment variables to use when + // interpreting the monitor configuration. + CustomEnvironment map[string]string // ActiveConfiguration defines the active process configuration. ActiveConfiguration *ProcessConfiguration @@ -78,17 +79,17 @@ type Monitor struct { } // StartMonitor starts the monitor loop. -func StartMonitor(logger logr.Logger, configFile string, fdbserverPath string) { +func StartMonitor(logger logr.Logger, configFile string, customEnvironment map[string]string) { podClient, err := CreatePodClient() if err != nil { panic(err) } monitor := &Monitor{ - ConfigFile: configFile, - FDBServerPath: fdbserverPath, - PodClient: podClient, - Logger: logger, + ConfigFile: configFile, + PodClient: podClient, + Logger: logger, + CustomEnvironment: customEnvironment, } go func() { monitor.WatchPodTimestamps() }() @@ -115,7 +116,7 @@ func (monitor *Monitor) LoadConfiguration() { } if currentContainerVersion == configuration.Version { - configuration.BinaryPath = monitor.FDBServerPath + configuration.BinaryPath = fdbserverPath } else { configuration.BinaryPath = path.Join(sharedBinaryDir, configuration.Version, "fdbserver") } @@ -130,7 +131,7 @@ func (monitor *Monitor) LoadConfiguration() { return } - _, err = configuration.GenerateArguments(1, nil) + _, err = configuration.GenerateArguments(1, monitor.CustomEnvironment) if err != nil { monitor.Logger.Error(err, "Error generating arguments for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath) return @@ -181,7 +182,7 @@ func (monitor *Monitor) RunProcess(processNumber int) { } monitor.Mutex.Unlock() - arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, nil) + arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, monitor.CustomEnvironment) if err != nil { logger.Error(err, "Error generating arguments for subprocess", "configuration", monitor.ActiveConfiguration) time.Sleep(errorBackoffSeconds * time.Second) From 1a5069a0471d6e14a247465a6bf4e93daa57d2e0 Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Fri, 17 Sep 2021 16:26:05 -0700 Subject: [PATCH 14/69] Use an write-and-rename pattern when copying files for atomicity. Restructure the usage of the mutex in the monitor class. --- fdbkubernetesmonitor/README.md | 6 ++- fdbkubernetesmonitor/copy.go | 45 ++++++++--------- fdbkubernetesmonitor/monitor.go | 51 ++++++++++++++------ packaging/docker/kubernetes/test_config.yaml | 4 +- 4 files changed, 66 insertions(+), 40 deletions(-) diff --git a/fdbkubernetesmonitor/README.md b/fdbkubernetesmonitor/README.md index 95a54866b68..5f85436636d 100644 --- a/fdbkubernetesmonitor/README.md +++ b/fdbkubernetesmonitor/README.md @@ -4,8 +4,8 @@ This package provides a launcher program for running FoundationDB in Kubernetes. To test this, run the following commands from the root of the FoundationDB repository: - docker build -t foundationdb/foundationdb-kubernetes:latest --build-arg FDB_VERSION=6.3.13 --build-arg FDB_LIBRARY_VERSIONS="6.3.13 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . - docker build -t foundationdb/foundationdb-kubernetes:latest-sidecar --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . + docker build -t foundationdb/foundationdb-kubernetes:6.3.13-local --build-arg FDB_VERSION=6.3.13 --build-arg FDB_LIBRARY_VERSIONS="6.3.13 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . + docker build -t foundationdb/foundationdb-kubernetes:6.3.15-local --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . kubectl apply -f packaging/docker/kubernetes/test_config.yaml # Wait for the pods to become ready ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') @@ -14,6 +14,8 @@ repository: # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd" +This will set up a cluster in your Kubernetes environment using a statefulset, to provide a simple subset of what the Kubernetes operator does to set up the cluster. + You can then make changes to the data in the config map and update the fdbserver processes: cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - diff --git a/fdbkubernetesmonitor/copy.go b/fdbkubernetesmonitor/copy.go index 80074bc0f13..bf91d4cede3 100644 --- a/fdbkubernetesmonitor/copy.go +++ b/fdbkubernetesmonitor/copy.go @@ -21,17 +21,13 @@ package main import ( "fmt" - "io" + "io/ioutil" "os" "path" "github.com/go-logr/logr" ) -const ( - bufferSize = 1024 -) - // copyFile copies a file into the output directory. func copyFile(logger logr.Logger, inputPath string, outputPath string, required bool) error { logger.Info("Copying file", "inputPath", inputPath, "outputPath", outputPath) @@ -52,29 +48,34 @@ func copyFile(logger logr.Logger, inputPath string, outputPath string, required return fmt.Errorf("File %s is empty", inputPath) } - outputFile, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY, inputInfo.Mode()) + outputDir := path.Dir(outputPath) + + tempFile, err := ioutil.TempFile(outputDir, "") if err != nil { return err } - defer outputFile.Close() + defer tempFile.Close() - var buffer = make([]byte, bufferSize) - for { - readLength, readError := inputFile.Read(buffer) - if readError == io.EOF { - break - } - if readError != nil { - logger.Error(readError, "Error reading file", "path", inputPath) - return readError - } + _, err = tempFile.ReadFrom(inputFile) + if err != nil { + return err + } - _, writeError := outputFile.Write(buffer[:readLength]) - if writeError != nil { - logger.Error(writeError, "Error writing file", "path", outputPath) - return writeError - } + err = tempFile.Close() + if err != nil { + return err + } + + err = os.Chmod(tempFile.Name(), inputInfo.Mode()) + if err != nil { + return err } + + err = os.Rename(tempFile.Name(), outputPath) + if err != nil { + return err + } + return nil } diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index a9649d68e97..d366a6a2bd3 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -68,6 +68,8 @@ type Monitor struct { ProcessIDs []int // Mutex defines a mutex around working with configuration. + // This is used to synchronize access to local state like the active + // configuration and the process IDs from multiple goroutines. Mutex sync.Mutex // PodClient is a client for posting updates about this pod to @@ -137,9 +139,15 @@ func (monitor *Monitor) LoadConfiguration() { return } - monitor.Logger.Info("Received new configuration file", "configuration", configuration) + monitor.acceptConfiguration(configuration, configurationBytes) +} + +// acceptConfiguration is called when the monitor process parses and accepts +// a configuration from the local config file. +func (monitor *Monitor) acceptConfiguration(configuration *ProcessConfiguration, configurationBytes []byte) { monitor.Mutex.Lock() defer monitor.Mutex.Unlock() + monitor.Logger.Info("Received new configuration file", "configuration", configuration) if monitor.ProcessIDs == nil { monitor.ProcessIDs = make([]int, configuration.ServerCount+1) @@ -161,7 +169,7 @@ func (monitor *Monitor) LoadConfiguration() { } } - err = monitor.PodClient.UpdateAnnotations(monitor) + err := monitor.PodClient.UpdateAnnotations(monitor) if err != nil { monitor.Logger.Error(err, "Error updating pod annotations") } @@ -173,14 +181,9 @@ func (monitor *Monitor) RunProcess(processNumber int) { logger := monitor.Logger.WithValues("processNumber", processNumber, "area", "RunProcess") logger.Info("Starting run loop") for { - monitor.Mutex.Lock() - if monitor.ActiveConfiguration.ServerCount < processNumber { - logger.Info("Terminating run loop") - monitor.ProcessIDs[processNumber] = 0 - monitor.Mutex.Unlock() + if !monitor.checkProcessRequired(processNumber) { return } - monitor.Mutex.Unlock() arguments, err := monitor.ActiveConfiguration.GenerateArguments(processNumber, monitor.CustomEnvironment) if err != nil { @@ -220,9 +223,7 @@ func (monitor *Monitor) RunProcess(processNumber int) { startTime := time.Now() logger.Info("Subprocess started", "PID", pid) - monitor.Mutex.Lock() - monitor.ProcessIDs[processNumber] = pid - monitor.Mutex.Unlock() + monitor.updateProcessID(processNumber, pid) if stdout != nil { stdoutScanner := bufio.NewScanner(stdout) @@ -254,9 +255,7 @@ func (monitor *Monitor) RunProcess(processNumber int) { logger.Info("Subprocess terminated", "exitCode", exitCode, "PID", pid) endTime := time.Now() - monitor.Mutex.Lock() - monitor.ProcessIDs[processNumber] = -1 - monitor.Mutex.Unlock() + monitor.updateProcessID(processNumber, -1) processDuration := endTime.Sub(startTime) if processDuration.Seconds() < errorBackoffSeconds { @@ -266,6 +265,30 @@ func (monitor *Monitor) RunProcess(processNumber int) { } } +// checkProcessRequired determines if the latest configuration requires that a +// process stay running. +// If the process is no longer desired, this will remove it from the process ID +// list and return false. If the process is still desired, this will return +// true. +func (monitor *Monitor) checkProcessRequired(processNumber int) bool { + monitor.Mutex.Lock() + defer monitor.Mutex.Unlock() + logger := monitor.Logger.WithValues("processNumber", processNumber, "area", "checkProcessRequired") + if monitor.ActiveConfiguration.ServerCount < processNumber { + logger.Info("Terminating run loop") + monitor.ProcessIDs[processNumber] = 0 + return false + } + return true +} + +// updateProcessID records a new Process ID from a newly launched process. +func (monitor *Monitor) updateProcessID(processNumber int, pid int) { + monitor.Mutex.Lock() + defer monitor.Mutex.Unlock() + monitor.ProcessIDs[processNumber] = pid +} + // WatchConfiguration detects changes to the monitor configuration file. func (monitor *Monitor) WatchConfiguration(watcher *fsnotify.Watcher) { for { diff --git a/packaging/docker/kubernetes/test_config.yaml b/packaging/docker/kubernetes/test_config.yaml index d96f883648c..1f43b7dd3e6 100644 --- a/packaging/docker/kubernetes/test_config.yaml +++ b/packaging/docker/kubernetes/test_config.yaml @@ -43,7 +43,7 @@ spec: spec: containers: - name: foundationdb - image: foundationdb/foundationdb-kubernetes:latest + image: foundationdb/foundationdb-kubernetes:6.3.13-local imagePullPolicy: IfNotPresent args: - --input-dir @@ -91,7 +91,7 @@ spec: - name: logs mountPath: /var/fdb/logs - name: foundationdb-sidecar - image: foundationdb/foundationdb-kubernetes:latest-sidecar + image: foundationdb/foundationdb-kubernetes:6.3.15-local imagePullPolicy: IfNotPresent args: - --mode From a6b903e7f8737294fef0b0e62a7818550f5b00fa Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Tue, 21 Sep 2021 12:12:43 -0700 Subject: [PATCH 15/69] Move the new Kubernetes image to centos 7. --- fdbkubernetesmonitor/README.md | 40 +++++++++++++++----------- fdbkubernetesmonitor/main.go | 9 +++--- fdbkubernetesmonitor/monitor.go | 24 +++++++++++----- packaging/docker/kubernetes/Dockerfile | 34 +++++++++++----------- 4 files changed, 63 insertions(+), 44 deletions(-) diff --git a/fdbkubernetesmonitor/README.md b/fdbkubernetesmonitor/README.md index 5f85436636d..b8a68a03acc 100644 --- a/fdbkubernetesmonitor/README.md +++ b/fdbkubernetesmonitor/README.md @@ -4,28 +4,34 @@ This package provides a launcher program for running FoundationDB in Kubernetes. To test this, run the following commands from the root of the FoundationDB repository: - docker build -t foundationdb/foundationdb-kubernetes:6.3.13-local --build-arg FDB_VERSION=6.3.13 --build-arg FDB_LIBRARY_VERSIONS="6.3.13 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . - docker build -t foundationdb/foundationdb-kubernetes:6.3.15-local --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . - kubectl apply -f packaging/docker/kubernetes/test_config.yaml - # Wait for the pods to become ready - ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') - cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - - kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite - # Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. - kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd" - -This will set up a cluster in your Kubernetes environment using a statefulset, to provide a simple subset of what the Kubernetes operator does to set up the cluster. +```bash +docker build -t foundationdb/foundationdb-kubernetes:6.3.13-local --build-arg FDB_VERSION=6.3.13 --build-arg FDB_LIBRARY_VERSIONS="6.3.13 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . +docker build -t foundationdb/foundationdb-kubernetes:6.3.15-local --build-arg FDB_VERSION=6.3.15 --build-arg FDB_LIBRARY_VERSIONS="6.3.15 6.2.30 6.1.13" -f packaging/docker/kubernetes/Dockerfile . +kubectl apply -f packaging/docker/kubernetes/test_config.yaml +# Wait for the pods to become ready +ips=$(kubectl get pod -l app=fdb-kubernetes-example -o json | jq -j '[[.items|.[]|select(.status.podIP!="")]|limit(3;.[])|.status.podIP+":4501"]|join(",")') +sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" packaging/docker/kubernetes/test_config.yaml | kubectl apply -f - +kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite +# Watch the logs for the fdb-kubernetes-example pods to confirm that they have launched the fdbserver processes. +kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "configure new double ssd" +``` + +This will set up a cluster in your Kubernetes environment using a statefulset, to provide a simple subset of what the Kubernetes operator does to set up the cluster. Note: This assumes that you are running Docker Desktop on your local machine, with Kubernetes configured through Docker Desktop. You can then make changes to the data in the config map and update the fdbserver processes: - cat packaging/docker/kubernetes/test_config.yaml | sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" | kubectl apply -f - +```bash +sed -e "s/fdb.cluster: \"\"/fdb.cluster: \"test:test@$ips\"/" -e "s/\"serverCount\": 0/\"serverCount\": 1/" packaging/docker/kubernetes/test_config.yaml | kubectl apply -f - - # You can apply an annotation to speed up the propagation of config - kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite +# You can apply an annotation to speed up the propagation of config +kubectl get pod -l app=fdb-kubernetes-example -o name | xargs -I {} kubectl annotate {} foundationdb.org/outdated-config-map-seen=$(date +%s) --overwrite - # Watch the logs for the fdb-kubernetes-example pods to confirm that they have reloaded their configuration, and then do a bounce. - kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "kill; kill all; status" +# Watch the logs for the fdb-kubernetes-example pods to confirm that they have reloaded their configuration, and then do a bounce. +kubectl exec -it sts/fdb-kubernetes-example -- fdbcli --exec "kill; kill all; status" +``` Once you are done, you can tear down the example with the following command: - kubectl delete -f packaging/docker/kubernetes/test_config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example +```bash +kubectl delete -f packaging/docker/kubernetes/test_config.yaml; kubectl delete pvc -l app=fdb-kubernetes-example +``` diff --git a/fdbkubernetesmonitor/main.go b/fdbkubernetesmonitor/main.go index 1237ffa81b1..821ee13b500 100644 --- a/fdbkubernetesmonitor/main.go +++ b/fdbkubernetesmonitor/main.go @@ -103,20 +103,21 @@ func main() { } mode := executionMode(executionModeString) - if mode == executionModeLauncher { + switch mode { + case executionModeLauncher: customEnvironment, err := loadAdditionalEnvironment(logger) if err != nil { logger.Error(err, "Error loading additional environment") os.Exit(1) } StartMonitor(logger, fmt.Sprintf("%s/%s", inputDir, monitorConfFile), customEnvironment) - } else if mode == executionModeInit { + case executionModeInit: err = CopyFiles(logger, outputDir, copyDetails, requiredCopies) if err != nil { logger.Error(err, "Error copying files") os.Exit(1) } - } else if mode == executionModeSidecar { + case executionModeSidecar: if mainContainerVersion != currentContainerVersion { err = CopyFiles(logger, outputDir, copyDetails, requiredCopies) if err != nil { @@ -126,7 +127,7 @@ func main() { done := make(chan bool) <-done } - } else { + default: logger.Error(nil, "Unknown execution mode", "mode", mode) os.Exit(1) } diff --git a/fdbkubernetesmonitor/monitor.go b/fdbkubernetesmonitor/monitor.go index d366a6a2bd3..2db0a469c98 100644 --- a/fdbkubernetesmonitor/monitor.go +++ b/fdbkubernetesmonitor/monitor.go @@ -22,6 +22,7 @@ package main import ( "bufio" "encoding/json" + "fmt" "io" "os" "os/exec" @@ -123,13 +124,9 @@ func (monitor *Monitor) LoadConfiguration() { configuration.BinaryPath = path.Join(sharedBinaryDir, configuration.Version, "fdbserver") } - binaryStat, err := os.Stat(configuration.BinaryPath) + err = checkOwnerExecutable(configuration.BinaryPath) if err != nil { - monitor.Logger.Error(err, "Error checking binary path for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath) - return - } - if binaryStat.Mode()&0o100 == 0 { - monitor.Logger.Error(nil, "New binary path is not executable", "configuration", configuration, "binaryPath", configuration.BinaryPath) + monitor.Logger.Error(err, "Error with binary path for latest configuration", "configuration", configuration, "binaryPath", configuration.BinaryPath) return } @@ -142,6 +139,19 @@ func (monitor *Monitor) LoadConfiguration() { monitor.acceptConfiguration(configuration, configurationBytes) } +// checkOwnerExecutable validates that a path is a file that exists and is +// executable by its owner. +func checkOwnerExecutable(path string) error { + binaryStat, err := os.Stat(path) + if err != nil { + return err + } + if binaryStat.Mode()&0o100 == 0 { + return fmt.Errorf("Binary is not executable") + } + return nil +} + // acceptConfiguration is called when the monitor process parses and accepts // a configuration from the local config file. func (monitor *Monitor) acceptConfiguration(configuration *ProcessConfiguration, configurationBytes []byte) { @@ -217,7 +227,7 @@ func (monitor *Monitor) RunProcess(processNumber int) { if cmd.Process != nil { pid = cmd.Process.Pid } else { - logger.Error(nil, "No Process information availale for subprocess") + logger.Error(nil, "No Process information available for subprocess") } startTime := time.Now() diff --git a/packaging/docker/kubernetes/Dockerfile b/packaging/docker/kubernetes/Dockerfile index 2f6d4ca026c..58ef7e998ac 100644 --- a/packaging/docker/kubernetes/Dockerfile +++ b/packaging/docker/kubernetes/Dockerfile @@ -30,22 +30,24 @@ RUN go build -o /fdb-kubernetes-monitor ./... # Build the main image -FROM ubuntu:18.04 - -RUN apt-get update && \ - apt-get install -y curl>=7.58.0-2ubuntu3.6 \ - dnsutils>=1:9.11.3+dfsg-1ubuntu1.7 \ - lsof>=4.89+dfsg-0.1 \ - tcptraceroute>=1.5beta7+debian-4build1 \ - telnet>=0.17-41 \ - netcat>=1.10-41.1 \ - strace>=4.21-1ubuntu1 \ - tcpdump>=4.9.3-0ubuntu0.18.04.1 \ - less>=487-0.1 \ - vim>=2:8.0.1453-1ubuntu1.4 \ - net-tools>=1.60+git20161116.90da8a0-1ubuntu1 \ - jq>=1.5+dfsg-2 && \ - rm -rf /var/lib/apt/lists/* +FROM centos:7.9.2009 + +RUN yum install -y \ + binutils-2.27-44.base.el7 \ + bind-utils-9.11.4-26.P2.el7_9.7 \ + curl-7.29.0-59.el7_9.1 \ + less-458-9.el7 \ + lsof-4.87-6.el7 \ + nano-2.3.1-10.el7 \ + nmap-ncat-6.40-19.el7 \ + net-tools-2.0-0.25.20131004git.el7 \ + strace-4.24-6.el7 \ + tar-1.26-35.el7 \ + telnet-0.17-66.el7 \ + traceroute-2.0.22-2.el7 \ + tcpdump-4.9.2-4.el7_7.1 \ + vim-enhanced-7.4.629-8.el7_9 \ + && yum clean all ARG FDB_VERSION ARG FDB_LIBRARY_VERSIONS="${FDB_VERSION}" From ee292e2df7f2cf21595796cc7cb855dde3c53234 Mon Sep 17 00:00:00 2001 From: John Brownlee Date: Wed, 29 Sep 2021 15:56:56 -0700 Subject: [PATCH 16/69] Update based on PR feedback. --- fdbkubernetesmonitor/config.go | 3 ++- fdbkubernetesmonitor/copy.go | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fdbkubernetesmonitor/config.go b/fdbkubernetesmonitor/config.go index ac388f10dd7..2815dbd2356 100644 --- a/fdbkubernetesmonitor/config.go +++ b/fdbkubernetesmonitor/config.go @@ -22,6 +22,7 @@ package main import ( "fmt" "os" + "strconv" ) // ProcessConfiguration models the configuration for starting a FoundationDB @@ -107,7 +108,7 @@ func (argument Argument) GenerateArgument(processNumber int, env map[string]stri number = number * argument.Multiplier } number = number + argument.Offset - return fmt.Sprintf("%d", number), nil + return strconv.Itoa(number), nil case EnvironmentArgumentType: var value string var present bool diff --git a/fdbkubernetesmonitor/copy.go b/fdbkubernetesmonitor/copy.go index bf91d4cede3..2414a8cf7d8 100644 --- a/fdbkubernetesmonitor/copy.go +++ b/fdbkubernetesmonitor/copy.go @@ -21,7 +21,6 @@ package main import ( "fmt" - "io/ioutil" "os" "path" @@ -50,7 +49,7 @@ func copyFile(logger logr.Logger, inputPath string, outputPath string, required outputDir := path.Dir(outputPath) - tempFile, err := ioutil.TempFile(outputDir, "") + tempFile, err := os.CreateTemp(outputDir, "") if err != nil { return err } From 3305fe92f6d94e03c95c63e0d50d582ec5148875 Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Thu, 28 Oct 2021 10:44:04 -0700 Subject: [PATCH 17/69] fix typo in script --- packaging/docker/ycsb/run_ycsb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packaging/docker/ycsb/run_ycsb.sh b/packaging/docker/ycsb/run_ycsb.sh index 9bf1e54970c..96337cccf0c 100644 --- a/packaging/docker/ycsb/run_ycsb.sh +++ b/packaging/docker/ycsb/run_ycsb.sh @@ -14,5 +14,5 @@ echo "RUNNING YCSB" echo "YCSB FINISHED" echo "COPYING HISTOGRAMS TO S3" -aws s3 sync --sse aws:kms --exclude "*" --include "histogram.*" /tmp s3://${BUCKET}/ycsb_histgorams/${namespace}/${POD_NAME} +aws s3 sync --sse aws:kms --exclude "*" --include "histogram.*" /tmp s3://${BUCKET}/ycsb_histograms/${namespace}/${POD_NAME} echo "COPYING HISTOGRAMS TO S3 FINISHED" \ No newline at end of file From a4d784a3dc58a3556e58e81e99cf00c561110175 Mon Sep 17 00:00:00 2001 From: Leonidas Tsampros Date: Fri, 1 Oct 2021 11:17:39 +0100 Subject: [PATCH 18/69] packaging: apt doesn't support >= and fix tini installation --- packaging/docker/misc/tini-amd64.sha256sum | 1 - packaging/docker/release/Dockerfile | 36 ++++++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) delete mode 100644 packaging/docker/misc/tini-amd64.sha256sum diff --git a/packaging/docker/misc/tini-amd64.sha256sum b/packaging/docker/misc/tini-amd64.sha256sum deleted file mode 100644 index 3cb1f9f6356..00000000000 --- a/packaging/docker/misc/tini-amd64.sha256sum +++ /dev/null @@ -1 +0,0 @@ -93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c tini-amd64 diff --git a/packaging/docker/release/Dockerfile b/packaging/docker/release/Dockerfile index 7df65e63c64..8bdfcd21098 100644 --- a/packaging/docker/release/Dockerfile +++ b/packaging/docker/release/Dockerfile @@ -20,28 +20,30 @@ FROM ubuntu:18.04 RUN apt-get update && \ - apt-get install -y curl>=7.58.0-2ubuntu3.6 \ - dnsutils>=1:9.11.3+dfsg-1ubuntu1.7 \ - lsof>=4.89+dfsg-0.1 \ - tcptraceroute>=1.5beta7+debian-4build1 \ - telnet>=0.17-41 \ - netcat>=1.10-41.1 \ - strace>=4.21-1ubuntu1 \ - tcpdump>=4.9.3-0ubuntu0.18.04.1 \ - less>=487-0.1 \ - vim>=2:8.0.1453-1ubuntu1.4 \ - net-tools>=1.60+git20161116.90da8a0-1ubuntu1 \ - jq>=1.5+dfsg-2 \ - openssl>=1.1.1-1ubuntu2.1~18.04.9 && \ + apt-get install -y curl \ + dnsutils \ + lsof \ + tcptraceroute \ + telnet \ + netcat \ + strace \ + tcpdump \ + less \ + vim \ + net-tools \ + jq \ + openssl && \ rm -rf /var/lib/apt/lists/* COPY misc/tini-amd64.sha256sum /tmp/ # Adding tini as PID 1 https://github.com/krallin/tini ARG TINI_VERSION=v0.19.0 -RUN curl -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ - sha256sum -c /tmp/tini-amd64.sha256sum && \ - chmod +x tini-amd64 && \ - mv tini-amd64 /usr/bin/tini +RUN curl -o /tmp/tini-amd64 -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ + curl -o /tmp/tini-amd64.sha256sum -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64.sha256sum && \ + cd tmp && sha256sum -c /tmp/tini-amd64.sha256sum && \ + mv /tmp/tini-amd64 /usr/bin/tini && \ + chmod +x /usr/bin/tini + ARG FDB_VERSION ARG FDB_ADDITIONAL_VERSIONS="5.1.7" From 504f08a102a5df1927e7def5363e0d70c0fbbb04 Mon Sep 17 00:00:00 2001 From: Aaron Molitor Date: Thu, 28 Oct 2021 23:12:22 -0700 Subject: [PATCH 19/69] consolidate docker stuff, add perf and flamegraph parts to release image --- packaging/docker/Dockerfile.eks | 167 +++++++++++------- packaging/docker/misc/flamegraph.sha256sum | 2 - packaging/docker/release/Dockerfile | 94 +++++----- .../docker/release/create_cluster_file.bash | 52 ------ .../release/create_server_environment.bash | 43 ----- .../download_multiversion_libraries.bash | 31 ---- packaging/docker/release/fdb.bash | 51 +++++- packaging/docker/sidecar/Dockerfile | 58 +++--- packaging/docker/sidecar/entrypoint.bash | 2 +- packaging/docker/sidecar/requirements.txt | 1 - 10 files changed, 229 insertions(+), 272 deletions(-) delete mode 100644 packaging/docker/misc/flamegraph.sha256sum delete mode 100755 packaging/docker/release/create_cluster_file.bash delete mode 100755 packaging/docker/release/create_server_environment.bash delete mode 100755 packaging/docker/release/download_multiversion_libraries.bash diff --git a/packaging/docker/Dockerfile.eks b/packaging/docker/Dockerfile.eks index bc05b4d5a6d..b2aaf7b4f41 100644 --- a/packaging/docker/Dockerfile.eks +++ b/packaging/docker/Dockerfile.eks @@ -1,76 +1,108 @@ +# Dockerfile +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2021 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + FROM amazonlinux:2.0.20210326.0 as base RUN yum install -y \ - binutils \ - bind-utils \ - curl \ - gdb \ - jq \ - less \ - lsof \ - nc \ - net-tools \ - perf \ - perl \ - procps \ - python38 \ - python3-pip \ - strace \ - tar \ - traceroute \ - telnet \ - tcpdump \ - unzip \ - vim - -#todo: nload, iperf, numademo - -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.0.30.zip" -o "awscliv2.zip" \ - && unzip awscliv2.zip && ./aws/install && rm -rf aws - -COPY misc/tini-amd64.sha256sum /tmp/ -COPY misc/flamegraph.sha256sum /tmp/ -# Adding tini as PID 1 https://github.com/krallin/tini -ARG TINI_VERSION=v0.19.0 -RUN curl -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ - sha256sum -c /tmp/tini-amd64.sha256sum && \ - chmod +x tini-amd64 && \ - mv tini-amd64 /usr/bin/tini + binutils \ + bind-utils \ + curl \ + gdb \ + jq \ + less \ + lsof \ + nc \ + net-tools \ + perf \ + perl \ + procps \ + python38 \ + python3-pip \ + strace \ + tar \ + traceroute \ + telnet \ + tcpdump \ + unzip \ + vim && \ + yum clean all && \ + rm -rf /var/cache/yum + +# TODO: nload, iperf, numademo + +RUN curl https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.2.43.zip -o "awscliv2.zip" && \ + echo "9a8b3c4e7f72bbcc55e341dce3af42479f2730c225d6d265ee6f9162cfdebdfd awscliv2.zip" > awscliv2.txt && \ + sha256sum -c awscliv2.txt && \ + unzip -qq awscliv2.zip && \ + ./aws/install && \ + rm -rf /tmp/* -COPY sidecar/requirements.txt /tmp -RUN pip3 install -r /tmp/requirements.txt +# Adding tini as PID 1 https://github.com/krallin/tini +RUN curl -Ls https://github.com/krallin/tini/releases/download/v0.19.0/tini-amd64 -o tini && \ + echo "93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c tini" > tini-amd64.sha256sum && \ + sha256sum -c tini-amd64.sha256sum && \ + chmod +x tini && \ + mv tini /usr/bin/ && \ + rm -rf /tmp/* # Install flamegraph -RUN curl -sLO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/stackcollapse-perf.pl && \ - curl -sLO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/flamegraph.pl && \ - sha256sum -c /tmp/flamegraph.sha256sum && \ - chmod +x stackcollapse-perf.pl flamegraph.pl && \ - mv stackcollapse-perf.pl flamegraph.pl /usr/bin - -# TODO: Only used by sidecar -RUN groupadd --gid 4059 fdb && \ - useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb +RUN curl -LsO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/stackcollapse-perf.pl && \ + curl -LsO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/flamegraph.pl && \ + echo "a682ac46497d6fdbf9904d1e405d3aea3ad255fcb156f6b2b1a541324628dfc0 flamegraph.pl" > flamegraph.sha256sum && \ + echo "5bcfb73ff2c2ab7bf2ad2b851125064780b58c51cc602335ec0001bec92679a5 stackcollapse-perf.pl" >> flamegraph.sha256sum && \ + sha256sum -c flamegraph.sha256sum && \ + chmod +x stackcollapse-perf.pl flamegraph.pl && \ + mv stackcollapse-perf.pl flamegraph.pl /usr/bin ARG FDB_VERSION +ARG FDB_ADDITIONAL_VERSIONS="6.3.12 6.2.30 6.1.13 5.1.7" +ARG FDB_WEBSITE=https://www.foundationdb.org + +# Install additional FoundationDB Client Libraries +RUN mkdir -p /usr/lib/fdb/multiversion && \ + for version in $FDB_ADDITIONAL_VERSIONS; do \ + curl $FDB_WEBSITE/downloads/$version/linux/libfdb_c_$version.so -o /usr/lib/fdb/multiversion/libfdb_c_$version.so; \ + done && \ + rm -rf /mnt/website -# These are the output of the current build (not stripped) COPY --chown=root bin /usr/bin/ COPY --chown=root lib/libfdb_c.so /var/fdb/lib/ RUN mv /var/fdb/lib/libfdb_c.so /var/fdb/lib/libfdb_c_${FDB_VERSION%.*}.so RUN ln -s /var/fdb/lib/libfdb_c_${FDB_VERSION%.*}.so /var/fdb/lib/libfdb_c.so -# ------------------------------------------------- + +# =========================== END OF LAYER: base =============================== FROM base as foundationdb -COPY release/*.bash /var/fdb/scripts/ -RUN mkdir -p /var/fdb/logs +ARG FDB_VERSION + +WORKDIR / -# TODO: FDB_ADDITIONAL_VERSIONS -RUN mkdir -p /usr/lib/fdb/multiversion +# Set Up Runtime Scripts and Directories +ADD release/fdb.bash /var/fdb/scripts/ +RUN chmod a+x /var/fdb/scripts/fdb.bash + +RUN mkdir -p /var/fdb/logs VOLUME /var/fdb/data # Runtime Configuration Options + ENV FDB_PORT 4500 ENV FDB_CLUSTER_FILE /var/fdb/fdb.cluster ENV FDB_NETWORKING_MODE container @@ -79,27 +111,34 @@ ENV FDB_COORDINATOR_PORT 4500 ENV FDB_CLUSTER_FILE_CONTENTS "" ENV FDB_PROCESS_CLASS unset -ENTRYPOINT ["/usr/bin/tini", "-g", "--"] -CMD /var/fdb/scripts/fdb.bash +ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/var/fdb/scripts/fdb.bash"] -# ------------------------------------------------- +# =========================== END OF LAYER: foundationdb =============================== FROM base AS sidecar +WORKDIR / + +ARG FDB_VERSION + +# Set Up Runtime Scripts and Directories + +ADD sidecar/entrypoint.bash sidecar/sidecar.py / +RUN chmod a+x /entrypoint.bash /sidecar.py +RUN pip3 install watchdog==0.9.0 -COPY sidecar/entrypoint.bash / -COPY sidecar/sidecar.py / -RUN chmod a+x /sidecar.py /entrypoint.bash +RUN echo ${FDB_VERSION} > /var/fdb/version && \ + mkdir -p /var/fdb/lib && \ + groupadd --gid 4059 fdb && \ + useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb VOLUME /var/input-files VOLUME /var/output-files -ARG FDB_VERSION +USER fdb -RUN echo ${FDB_VERSION} ; echo ${FDB_VERSION}> /var/fdb/version -RUN mkdir -p /var/fdb/lib +# Runtime Configuration Options ENV LISTEN_PORT 8080 -USER fdb - -ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/entrypoint.bash"] \ No newline at end of file +ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/entrypoint.bash"] +# =========================== END OF LAYER: sidecar =============================== diff --git a/packaging/docker/misc/flamegraph.sha256sum b/packaging/docker/misc/flamegraph.sha256sum deleted file mode 100644 index bb435ced8bd..00000000000 --- a/packaging/docker/misc/flamegraph.sha256sum +++ /dev/null @@ -1,2 +0,0 @@ -a682ac46497d6fdbf9904d1e405d3aea3ad255fcb156f6b2b1a541324628dfc0 flamegraph.pl -5bcfb73ff2c2ab7bf2ad2b851125064780b58c51cc602335ec0001bec92679a5 stackcollapse-perf.pl diff --git a/packaging/docker/release/Dockerfile b/packaging/docker/release/Dockerfile index 8bdfcd21098..fc58f64d494 100644 --- a/packaging/docker/release/Dockerfile +++ b/packaging/docker/release/Dockerfile @@ -20,63 +20,70 @@ FROM ubuntu:18.04 RUN apt-get update && \ - apt-get install -y curl \ - dnsutils \ - lsof \ - tcptraceroute \ - telnet \ - netcat \ - strace \ - tcpdump \ - less \ - vim \ - net-tools \ - jq \ - openssl && \ - rm -rf /var/lib/apt/lists/* - -COPY misc/tini-amd64.sha256sum /tmp/ + apt-get install -y \ + curl \ + dnsutils \ + jq \ + less \ + linux-tools-generic \ + lsof \ + net-tools \ + netcat \ + openssl \ + perl \ + strace \ + tcpdump \ + tcptraceroute \ + telnet \ + vim && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /tmp # Adding tini as PID 1 https://github.com/krallin/tini -ARG TINI_VERSION=v0.19.0 -RUN curl -o /tmp/tini-amd64 -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ - curl -o /tmp/tini-amd64.sha256sum -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64.sha256sum && \ - cd tmp && sha256sum -c /tmp/tini-amd64.sha256sum && \ - mv /tmp/tini-amd64 /usr/bin/tini && \ - chmod +x /usr/bin/tini - +RUN curl -Ls https://github.com/krallin/tini/releases/download/v0.19.0/tini-amd64 -o tini && \ + echo "93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c tini" > tini-amd64.sha256sum && \ + sha256sum -c tini-amd64.sha256sum && \ + chmod +x tini && \ + mv tini /usr/bin/ && \ + rm -rf /tmp/* + +# Install flamegraph +RUN curl -LsO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/stackcollapse-perf.pl && \ + curl -LsO https://raw.githubusercontent.com/brendangregg/FlameGraph/90533539b75400297092f973163b8a7b067c66d3/flamegraph.pl && \ + echo "a682ac46497d6fdbf9904d1e405d3aea3ad255fcb156f6b2b1a541324628dfc0 flamegraph.pl" > flamegraph.sha256sum && \ + echo "5bcfb73ff2c2ab7bf2ad2b851125064780b58c51cc602335ec0001bec92679a5 stackcollapse-perf.pl" >> flamegraph.sha256sum && \ + sha256sum -c flamegraph.sha256sum && \ + chmod +x stackcollapse-perf.pl flamegraph.pl && \ + mv stackcollapse-perf.pl flamegraph.pl /usr/bin ARG FDB_VERSION ARG FDB_ADDITIONAL_VERSIONS="5.1.7" ARG FDB_WEBSITE=https://www.foundationdb.org -WORKDIR /var/fdb/tmp COPY website /mnt/website/ +RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/libfdb_c_$FDB_VERSION.so -o /usr/lib/libfdb_c.so + # Install FoundationDB Binaries RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz | tar zxf - --strip-components=1 && \ - chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ - mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin && \ - rm -r /var/fdb/tmp + chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ + mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin && \ + rm -rf /tmp/* -WORKDIR / - -## TODO: Can unify everything above this line -## TODO: we can almost unify the additional client library download, -## but sidecar.py expects them in a different location, -## with a different naming convention. +# Install additional FoundationDB Client Libraries +RUN mkdir -p /usr/lib/fdb/multiversion && \ + for version in $FDB_ADDITIONAL_VERSIONS; do \ + curl $FDB_WEBSITE/downloads/$version/linux/libfdb_c_$version.so -o /usr/lib/fdb/multiversion/libfdb_c_$version.so; \ + done && \ + rm -rf /mnt/website -RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/libfdb_c_$FDB_VERSION.so -o /usr/lib/libfdb_c.so +WORKDIR / # Set Up Runtime Scripts and Directories -ADD release/*.bash /var/fdb/scripts/ -RUN chmod a+x /var/fdb/scripts/*.bash - -# Install additional FoundationDB Client Libraries -RUN /var/fdb/scripts/download_multiversion_libraries.bash $FDB_WEBSITE $FDB_ADDITIONAL_VERSIONS - -RUN rm -rf /mnt/website +ADD release/fdb.bash /var/fdb/scripts/ +RUN chmod a+x /var/fdb/scripts/fdb.bash -RUN mkdir -p /var/fdb/logs +RUN mkdir -p /var/fdb/logs VOLUME /var/fdb/data @@ -90,5 +97,4 @@ ENV FDB_COORDINATOR_PORT 4500 ENV FDB_CLUSTER_FILE_CONTENTS "" ENV FDB_PROCESS_CLASS unset -ENTRYPOINT ["/usr/bin/tini", "-g", "--"] -CMD /var/fdb/scripts/fdb.bash +ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/var/fdb/scripts/fdb.bash"] diff --git a/packaging/docker/release/create_cluster_file.bash b/packaging/docker/release/create_cluster_file.bash deleted file mode 100755 index c1bb959b8e6..00000000000 --- a/packaging/docker/release/create_cluster_file.bash +++ /dev/null @@ -1,52 +0,0 @@ -#! /bin/bash - -# -# create_cluster_file.bash -# -# This source file is part of the FoundationDB open source project -# -# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This script creates a cluster file for a server or client. -# This takes the cluster file path from the FDB_CLUSTER_FILE -# environment variable, with a default of /etc/foundationdb/fdb.cluster -# -# The name of the coordinator must be defined in the FDB_COORDINATOR environment -# variable, and it must be a name that can be resolved through DNS. - -function create_cluster_file() { - FDB_CLUSTER_FILE=${FDB_CLUSTER_FILE:-/etc/foundationdb/fdb.cluster} - mkdir -p $(dirname $FDB_CLUSTER_FILE) - - if [[ -n "$FDB_CLUSTER_FILE_CONTENTS" ]]; then - echo "$FDB_CLUSTER_FILE_CONTENTS" > $FDB_CLUSTER_FILE - elif [[ -n $FDB_COORDINATOR ]]; then - coordinator_ip=$(dig +short $FDB_COORDINATOR) - if [[ -z "$coordinator_ip" ]]; then - echo "Failed to look up coordinator address for $FDB_COORDINATOR" 1>&2 - exit 1 - fi - coordinator_port=${FDB_COORDINATOR_PORT:-4500} - echo "docker:docker@$coordinator_ip:$coordinator_port" > $FDB_CLUSTER_FILE - else - echo "FDB_COORDINATOR environment variable not defined" 1>&2 - exit 1 - fi -} - -if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then - create_cluster_file "$@" -fi diff --git a/packaging/docker/release/create_server_environment.bash b/packaging/docker/release/create_server_environment.bash deleted file mode 100755 index 51a782f991c..00000000000 --- a/packaging/docker/release/create_server_environment.bash +++ /dev/null @@ -1,43 +0,0 @@ -#! /bin/bash - -# -# create_server_environment.bash -# -# This source file is part of the FoundationDB open source project -# -# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -source /var/fdb/scripts/create_cluster_file.bash - -function create_server_environment() { - env_file=/var/fdb/.fdbenv - - if [[ "$FDB_NETWORKING_MODE" == "host" ]]; then - public_ip=127.0.0.1 - elif [[ "$FDB_NETWORKING_MODE" == "container" ]]; then - public_ip=$(hostname -i | awk '{print $1}') - else - echo "Unknown FDB Networking mode \"$FDB_NETWORKING_MODE\"" 1>&2 - exit 1 - fi - - echo "export PUBLIC_IP=$public_ip" > $env_file - if [[ -z $FDB_COORDINATOR && -z "$FDB_CLUSTER_FILE_CONTENTS" ]]; then - FDB_CLUSTER_FILE_CONTENTS="docker:docker@$public_ip:$FDB_PORT" - fi - - create_cluster_file -} diff --git a/packaging/docker/release/download_multiversion_libraries.bash b/packaging/docker/release/download_multiversion_libraries.bash deleted file mode 100755 index 1cd5770ff3e..00000000000 --- a/packaging/docker/release/download_multiversion_libraries.bash +++ /dev/null @@ -1,31 +0,0 @@ -#! /bin/bash - -# -# download_multiversion_libraries.bash -# -# This source file is part of the FoundationDB open source project -# -# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -mkdir -p /usr/lib/fdb/multiversion -website=$1 -shift -for version in $*; do - origin=$website/downloads/$version/linux/libfdb_c_$version.so - destination=/usr/lib/fdb/multiversion/libfdb_c_$version.so - echo "Downloading $origin to $destination" - curl $origin -o $destination -done diff --git a/packaging/docker/release/fdb.bash b/packaging/docker/release/fdb.bash index 943c8ed58bc..5d23fc41339 100755 --- a/packaging/docker/release/fdb.bash +++ b/packaging/docker/release/fdb.bash @@ -1,11 +1,11 @@ -#! /bin/bash +#!/bin/bash # # fdb.bash # # This source file is part of the FoundationDB open source project # -# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors +# Copyright 2013-2021 Apple Inc. and the FoundationDB project authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,10 +20,49 @@ # limitations under the License. # -source /var/fdb/scripts/create_server_environment.bash +function create_cluster_file() { + FDB_CLUSTER_FILE=${FDB_CLUSTER_FILE:-/etc/foundationdb/fdb.cluster} + mkdir -p "$(dirname $FDB_CLUSTER_FILE)" + + if [[ -n "$FDB_CLUSTER_FILE_CONTENTS" ]]; then + echo "$FDB_CLUSTER_FILE_CONTENTS" > "$FDB_CLUSTER_FILE" + elif [[ -n $FDB_COORDINATOR ]]; then + coordinator_ip=$(dig +short "$FDB_COORDINATOR") + if [[ -z "$coordinator_ip" ]]; then + echo "Failed to look up coordinator address for $FDB_COORDINATOR" 1>&2 + exit 1 + fi + coordinator_port=${FDB_COORDINATOR_PORT:-4500} + echo "docker:docker@$coordinator_ip:$coordinator_port" > "$FDB_CLUSTER_FILE" + else + echo "FDB_COORDINATOR environment variable not defined" 1>&2 + exit 1 + fi +} + +function create_server_environment() { + env_file=/var/fdb/.fdbenv + + if [[ "$FDB_NETWORKING_MODE" == "host" ]]; then + public_ip=127.0.0.1 + elif [[ "$FDB_NETWORKING_MODE" == "container" ]]; then + public_ip=$(hostname -i | awk '{print $1}') + else + echo "Unknown FDB Networking mode \"$FDB_NETWORKING_MODE\"" 1>&2 + exit 1 + fi + + echo "export PUBLIC_IP=$public_ip" > $env_file + if [[ -z $FDB_COORDINATOR && -z "$FDB_CLUSTER_FILE_CONTENTS" ]]; then + FDB_CLUSTER_FILE_CONTENTS="docker:docker@$public_ip:$FDB_PORT" + fi + + create_cluster_file +} + create_server_environment source /var/fdb/.fdbenv echo "Starting FDB server on $PUBLIC_IP:$FDB_PORT" -fdbserver --listen_address 0.0.0.0:$FDB_PORT --public_address $PUBLIC_IP:$FDB_PORT \ - --datadir /var/fdb/data --logdir /var/fdb/logs \ - --locality_zoneid="$(hostname)" --locality_machineid="$(hostname)" --class $FDB_PROCESS_CLASS +fdbserver --listen_address 0.0.0.0:"$FDB_PORT" --public_address "$PUBLIC_IP:$FDB_PORT" \ + --datadir /var/fdb/data --logdir /var/fdb/logs \ + --locality_zoneid="$(hostname)" --locality_machineid="$(hostname)" --class "$FDB_PROCESS_CLASS" diff --git a/packaging/docker/sidecar/Dockerfile b/packaging/docker/sidecar/Dockerfile index b2d76693ecb..3c281f89875 100644 --- a/packaging/docker/sidecar/Dockerfile +++ b/packaging/docker/sidecar/Dockerfile @@ -2,7 +2,7 @@ # # This source file is part of the FoundationDB open source project # -# Copyright 2018-2019 Apple Inc. and the FoundationDB project authors +# Copyright 2013-2021 Apple Inc. and the FoundationDB project authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,32 +20,38 @@ FROM python:3.9-slim RUN apt-get update && \ - apt-get install -y --no-install-recommends curl && \ - rm -rf /var/lub/apt/lists/* + apt-get install -y --no-install-recommends \ + curl && \ + pip install watchdog==0.9.0 && \ + rm -rf /var/lib/apt/lists/* -COPY misc/tini-amd64.sha256sum /tmp/ +WORKDIR /tmp # Adding tini as PID 1 https://github.com/krallin/tini -ARG TINI_VERSION=v0.19.0 -RUN curl -sLO https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ - sha256sum -c /tmp/tini-amd64.sha256sum && \ - chmod +x tini-amd64 && \ - mv tini-amd64 /usr/bin/tini - -COPY sidecar/requirements.txt /tmp -RUN pip install -r tmp/requirements.txt - -ARG FDB_VERSION= +RUN curl -Ls https://github.com/krallin/tini/releases/download/v0.19.0/tini-amd64 -o tini && \ + echo "93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c tini" > tini-amd64.sha256sum && \ + sha256sum -c tini-amd64.sha256sum && \ + chmod +x tini && \ + mv tini /usr/bin/ && \ + rm -rf /tmp/* + +ARG FDB_VERSION ARG FDB_ADDITIONAL_VERSIONS="6.2.30 6.1.13" ARG FDB_WEBSITE=https://www.foundationdb.org -WORKDIR /var/fdb/tmp COPY website /mnt/website/ # Install FoundationDB Binaries RUN curl $FDB_WEBSITE/downloads/$FDB_VERSION/linux/fdb_$FDB_VERSION.tar.gz | tar zxf - --strip-components=1 && \ - chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ - mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin && \ - rm -r /var/fdb/tmp + chmod u+x fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent && \ + mv fdbbackup fdbcli fdbdr fdbmonitor fdbrestore fdbserver backup_agent dr_agent /usr/bin && \ + rm -rf /tmp/* + +# Install additional FoundationDB Client Libraries +RUN mkdir -p /var/fdb/lib && \ + for version in $FDB_ADDITIONAL_VERSIONS; do \ + curl $FDB_WEBSITE/downloads/$version/linux/libfdb_c_$version.so -o /var/fdb/lib/libfdb_c_${version%.*}.so; \ + done && \ + rm -rf /mnt/website WORKDIR / @@ -53,16 +59,10 @@ WORKDIR / ADD sidecar/entrypoint.bash sidecar/sidecar.py / RUN chmod a+x /entrypoint.bash /sidecar.py -# Install additional FoundationDB Client Libraries -RUN mkdir -p /var/fdb/lib && \ - for version in $FDB_ADDITIONAL_VERSIONS; do curl $FDB_WEBSITE/downloads/$version/linux/libfdb_c_$version.so -o /var/fdb/lib/libfdb_c_${version%.*}.so; done - -RUN rm -rf /mnt/website - -RUN echo ${FDB_VERSION} > /var/fdb/version && \ - mkdir -p /var/fdb/lib && \ - groupadd --gid 4059 fdb && \ - useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb +RUN echo ${FDB_VERSION} > /var/fdb/version && \ + mkdir -p /var/fdb/lib && \ + groupadd --gid 4059 fdb && \ + useradd --gid 4059 --uid 4059 --no-create-home --shell /bin/bash fdb VOLUME /var/input-files @@ -70,6 +70,8 @@ VOLUME /var/output-files USER fdb +# Runtime Configuration Options + ENV LISTEN_PORT 8080 ENTRYPOINT ["/usr/bin/tini", "-g", "--", "/entrypoint.bash"] diff --git a/packaging/docker/sidecar/entrypoint.bash b/packaging/docker/sidecar/entrypoint.bash index b6678fc831d..165f11bce4b 100755 --- a/packaging/docker/sidecar/entrypoint.bash +++ b/packaging/docker/sidecar/entrypoint.bash @@ -1,4 +1,4 @@ -#! /bin/bash +#!/bin/bash # entrypoint.bash # diff --git a/packaging/docker/sidecar/requirements.txt b/packaging/docker/sidecar/requirements.txt index c7fcc8bac8e..e69de29bb2d 100644 --- a/packaging/docker/sidecar/requirements.txt +++ b/packaging/docker/sidecar/requirements.txt @@ -1 +0,0 @@ -watchdog==0.9.0 \ No newline at end of file From 13613ab0f1b71dec6c233bcf7f22ff9fc6531a2c Mon Sep 17 00:00:00 2001 From: QA Hoang Date: Thu, 28 Oct 2021 15:00:08 -0700 Subject: [PATCH 20/69] fixed mako bug and added comment --- bindings/c/test/mako/mako.c | 5 ++++- bindings/c/test/mako/mako.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/bindings/c/test/mako/mako.c b/bindings/c/test/mako/mako.c index f2027c42178..3cbbd7d50f2 100644 --- a/bindings/c/test/mako/mako.c +++ b/bindings/c/test/mako/mako.c @@ -1297,12 +1297,15 @@ int worker_process_main(mako_args_t* args, int worker_id, mako_shmhdr_t* shm, pi if (args->client_threads_per_version > 0) { err = fdb_network_set_option( - FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, (uint8_t*)&args->client_threads_per_version, sizeof(uint32_t)); + FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION, (uint8_t*)&args->client_threads_per_version, sizeof(int64_t)); if (err) { fprintf(stderr, "ERROR: fdb_network_set_option (FDB_NET_OPTION_CLIENT_THREADS_PER_VERSION) (%d): %s\n", (uint8_t*)&args->client_threads_per_version, fdb_get_error(err)); + // let's exit here since we do not want to confuse users + // that mako is running with multi-threaded client enabled + return -1; } } diff --git a/bindings/c/test/mako/mako.h b/bindings/c/test/mako/mako.h index 66a8039dcf0..2af3a7059b4 100644 --- a/bindings/c/test/mako/mako.h +++ b/bindings/c/test/mako/mako.h @@ -143,7 +143,7 @@ typedef struct { int txntagging; char txntagging_prefix[TAGPREFIXLENGTH_MAX]; FDBStreamingMode streaming_mode; - uint32_t client_threads_per_version; + int client_threads_per_version; int disable_ryw; char json_output_path[PATH_MAX]; } mako_args_t; From 78e36e75902a904d2cf3fb28e90f65f3e146d4ea Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 29 Oct 2021 11:18:47 -0700 Subject: [PATCH 21/69] fix: simulation only validation could throw errors which would impact the behavior of the cluster controller --- fdbserver/ClusterController.actor.cpp | 255 ++++++++++++++------------ 1 file changed, 134 insertions(+), 121 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index 1456319a3fe..e529b023c24 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1214,40 +1214,44 @@ class ClusterControllerData { exclusionWorkerIds); if (g_network->isSimulated()) { - auto testWorkers = getWorkersForTlogsBackup( - conf, required, desired, policy, testUsed, checkStable, dcIds, exclusionWorkerIds); - RoleFitness testFitness(testWorkers, ProcessClass::TLog, testUsed); - RoleFitness fitness(workers, ProcessClass::TLog, id_used); - - std::map>, int> field_count; - std::set>> zones; - for (auto& worker : testWorkers) { - if (!zones.count(worker.interf.locality.zoneId())) { - field_count[worker.interf.locality.get(pa1->attributeKey())]++; - zones.insert(worker.interf.locality.zoneId()); + try { + auto testWorkers = getWorkersForTlogsBackup( + conf, required, desired, policy, testUsed, checkStable, dcIds, exclusionWorkerIds); + RoleFitness testFitness(testWorkers, ProcessClass::TLog, testUsed); + RoleFitness fitness(workers, ProcessClass::TLog, id_used); + + std::map>, int> field_count; + std::set>> zones; + for (auto& worker : testWorkers) { + if (!zones.count(worker.interf.locality.zoneId())) { + field_count[worker.interf.locality.get(pa1->attributeKey())]++; + zones.insert(worker.interf.locality.zoneId()); + } } - } - // backup recruitment is not required to use degraded processes that have better fitness - // so we cannot compare degraded between the two methods - testFitness.degraded = fitness.degraded; - - int minField = 100; + // backup recruitment is not required to use degraded processes that have better fitness + // so we cannot compare degraded between the two methods + testFitness.degraded = fitness.degraded; - for (auto& f : field_count) { - minField = std::min(minField, f.second); - } + int minField = 100; - if (fitness > testFitness && minField > 1) { - for (auto& w : testWorkers) { - TraceEvent("TestTLogs").detail("Interf", w.interf.address()); + for (auto& f : field_count) { + minField = std::min(minField, f.second); } - for (auto& w : workers) { - TraceEvent("RealTLogs").detail("Interf", w.interf.address()); + + if (fitness > testFitness && minField > 1) { + for (auto& w : testWorkers) { + TraceEvent("TestTLogs").detail("Interf", w.interf.address()); + } + for (auto& w : workers) { + TraceEvent("RealTLogs").detail("Interf", w.interf.address()); + } + TraceEvent("FitnessCompare") + .detail("TestF", testFitness.toString()) + .detail("RealF", fitness.toString()); + ASSERT(false); } - TraceEvent("FitnessCompare") - .detail("TestF", testFitness.toString()) - .detail("RealF", fitness.toString()); - ASSERT(false); + } catch (Error& e) { + ASSERT(false); // Simulation only validation should not throw errors } } @@ -1267,25 +1271,29 @@ class ClusterControllerData { getWorkersForTlogsSimple(conf, required, desired, id_used, checkStable, dcIds, exclusionWorkerIds); if (g_network->isSimulated()) { - auto testWorkers = getWorkersForTlogsBackup( - conf, required, desired, policy, testUsed, checkStable, dcIds, exclusionWorkerIds); - RoleFitness testFitness(testWorkers, ProcessClass::TLog, testUsed); - RoleFitness fitness(workers, ProcessClass::TLog, id_used); - // backup recruitment is not required to use degraded processes that have better fitness - // so we cannot compare degraded between the two methods - testFitness.degraded = fitness.degraded; - - if (fitness > testFitness) { - for (auto& w : testWorkers) { - TraceEvent("TestTLogs").detail("Interf", w.interf.address()); - } - for (auto& w : workers) { - TraceEvent("RealTLogs").detail("Interf", w.interf.address()); + try { + auto testWorkers = getWorkersForTlogsBackup( + conf, required, desired, policy, testUsed, checkStable, dcIds, exclusionWorkerIds); + RoleFitness testFitness(testWorkers, ProcessClass::TLog, testUsed); + RoleFitness fitness(workers, ProcessClass::TLog, id_used); + // backup recruitment is not required to use degraded processes that have better fitness + // so we cannot compare degraded between the two methods + testFitness.degraded = fitness.degraded; + + if (fitness > testFitness) { + for (auto& w : testWorkers) { + TraceEvent("TestTLogs").detail("Interf", w.interf.address()); + } + for (auto& w : workers) { + TraceEvent("RealTLogs").detail("Interf", w.interf.address()); + } + TraceEvent("FitnessCompare") + .detail("TestF", testFitness.toString()) + .detail("RealF", fitness.toString()); + ASSERT(false); } - TraceEvent("FitnessCompare") - .detail("TestF", testFitness.toString()) - .detail("RealF", fitness.toString()); - ASSERT(false); + } catch (Error& e) { + ASSERT(false); // Simulation only validation should not throw errors } } return workers; @@ -2119,82 +2127,87 @@ class ClusterControllerData { RecruitFromConfigurationReply findWorkersForConfiguration(RecruitFromConfigurationRequest const& req) { RecruitFromConfigurationReply rep = findWorkersForConfigurationDispatch(req); if (g_network->isSimulated()) { - // FIXME: The logic to pick a satellite in a remote region is not - // deterministic and can therefore break this nondeterminism check. - // Since satellites will generally be in the primary region, - // disable the determinism check for remote region satellites. - bool remoteDCUsedAsSatellite = false; - if (req.configuration.regions.size() > 1) { - auto [region, remoteRegion] = - getPrimaryAndRemoteRegion(req.configuration.regions, req.configuration.regions[0].dcId); - for (const auto& satellite : region.satellites) { - if (satellite.dcId == remoteRegion.dcId) { - remoteDCUsedAsSatellite = true; + try { + // FIXME: The logic to pick a satellite in a remote region is not + // deterministic and can therefore break this nondeterminism check. + // Since satellites will generally be in the primary region, + // disable the determinism check for remote region satellites. + bool remoteDCUsedAsSatellite = false; + if (req.configuration.regions.size() > 1) { + auto [region, remoteRegion] = + getPrimaryAndRemoteRegion(req.configuration.regions, req.configuration.regions[0].dcId); + for (const auto& satellite : region.satellites) { + if (satellite.dcId == remoteRegion.dcId) { + remoteDCUsedAsSatellite = true; + } } } - } - if (!remoteDCUsedAsSatellite) { - RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req); - - std::map>, int> firstUsed; - std::map>, int> secondUsed; - updateKnownIds(&firstUsed); - updateKnownIds(&secondUsed); - - // auto mworker = id_worker.find(masterProcessId); - //TraceEvent("CompareAddressesMaster") - // .detail("Master", - // mworker != id_worker.end() ? mworker->second.details.interf.address() : NetworkAddress()); - - updateIdUsed(rep.tLogs, firstUsed); - updateIdUsed(compare.tLogs, secondUsed); - compareWorkers( - req.configuration, rep.tLogs, firstUsed, compare.tLogs, secondUsed, ProcessClass::TLog, "TLog"); - updateIdUsed(rep.satelliteTLogs, firstUsed); - updateIdUsed(compare.satelliteTLogs, secondUsed); - compareWorkers(req.configuration, - rep.satelliteTLogs, - firstUsed, - compare.satelliteTLogs, - secondUsed, - ProcessClass::TLog, - "Satellite"); - updateIdUsed(rep.commitProxies, firstUsed); - updateIdUsed(compare.commitProxies, secondUsed); - updateIdUsed(rep.grvProxies, firstUsed); - updateIdUsed(compare.grvProxies, secondUsed); - updateIdUsed(rep.resolvers, firstUsed); - updateIdUsed(compare.resolvers, secondUsed); - compareWorkers(req.configuration, - rep.commitProxies, - firstUsed, - compare.commitProxies, - secondUsed, - ProcessClass::CommitProxy, - "CommitProxy"); - compareWorkers(req.configuration, - rep.grvProxies, - firstUsed, - compare.grvProxies, - secondUsed, - ProcessClass::GrvProxy, - "GrvProxy"); - compareWorkers(req.configuration, - rep.resolvers, - firstUsed, - compare.resolvers, - secondUsed, - ProcessClass::Resolver, - "Resolver"); - updateIdUsed(rep.backupWorkers, firstUsed); - updateIdUsed(compare.backupWorkers, secondUsed); - compareWorkers(req.configuration, - rep.backupWorkers, - firstUsed, - compare.backupWorkers, - secondUsed, - ProcessClass::Backup, - "Backup"); + if (!remoteDCUsedAsSatellite) { + RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req); + + std::map>, int> firstUsed; + std::map>, int> secondUsed; + updateKnownIds(&firstUsed); + updateKnownIds(&secondUsed); + + // auto mworker = id_worker.find(masterProcessId); + //TraceEvent("CompareAddressesMaster") + // .detail("Master", + // mworker != id_worker.end() ? mworker->second.details.interf.address() : + // NetworkAddress()); + + updateIdUsed(rep.tLogs, firstUsed); + updateIdUsed(compare.tLogs, secondUsed); + compareWorkers( + req.configuration, rep.tLogs, firstUsed, compare.tLogs, secondUsed, ProcessClass::TLog, "TLog"); + updateIdUsed(rep.satelliteTLogs, firstUsed); + updateIdUsed(compare.satelliteTLogs, secondUsed); + compareWorkers(req.configuration, + rep.satelliteTLogs, + firstUsed, + compare.satelliteTLogs, + secondUsed, + ProcessClass::TLog, + "Satellite"); + updateIdUsed(rep.commitProxies, firstUsed); + updateIdUsed(compare.commitProxies, secondUsed); + updateIdUsed(rep.grvProxies, firstUsed); + updateIdUsed(compare.grvProxies, secondUsed); + updateIdUsed(rep.resolvers, firstUsed); + updateIdUsed(compare.resolvers, secondUsed); + compareWorkers(req.configuration, + rep.commitProxies, + firstUsed, + compare.commitProxies, + secondUsed, + ProcessClass::CommitProxy, + "CommitProxy"); + compareWorkers(req.configuration, + rep.grvProxies, + firstUsed, + compare.grvProxies, + secondUsed, + ProcessClass::GrvProxy, + "GrvProxy"); + compareWorkers(req.configuration, + rep.resolvers, + firstUsed, + compare.resolvers, + secondUsed, + ProcessClass::Resolver, + "Resolver"); + updateIdUsed(rep.backupWorkers, firstUsed); + updateIdUsed(compare.backupWorkers, secondUsed); + compareWorkers(req.configuration, + rep.backupWorkers, + firstUsed, + compare.backupWorkers, + secondUsed, + ProcessClass::Backup, + "Backup"); + } + } catch (Error& e) { + ASSERT(false); // Simulation only validation should not throw errors } } return rep; From ee00135a6b5c26cd3c821b2e38995c92663b8b9e Mon Sep 17 00:00:00 2001 From: Evan Tschannen Date: Fri, 29 Oct 2021 16:42:48 -0700 Subject: [PATCH 22/69] skip good recruitment errors when doing simulation only validation --- fdbserver/ClusterController.actor.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fdbserver/ClusterController.actor.cpp b/fdbserver/ClusterController.actor.cpp index e529b023c24..8691e11d73d 100644 --- a/fdbserver/ClusterController.actor.cpp +++ b/fdbserver/ClusterController.actor.cpp @@ -1671,7 +1671,8 @@ class ClusterControllerData { } ErrorOr findWorkersForConfigurationFromDC(RecruitFromConfigurationRequest const& req, - Optional dcId) { + Optional dcId, + bool checkGoodRecruitment) { RecruitFromConfigurationReply result; std::map>, int> id_used; updateKnownIds(&id_used); @@ -1782,7 +1783,7 @@ class ClusterControllerData { [](const WorkerDetails& w) { return w.interf; }); } - if (!goodRecruitmentTime.isReady() && + if (!goodRecruitmentTime.isReady() && checkGoodRecruitment && (RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog) .betterCount(RoleFitness(tlogs, ProcessClass::TLog, id_used)) || (region.satelliteTLogReplicationFactor > 0 && req.configuration.usableRegions > 1 && @@ -1808,7 +1809,8 @@ class ClusterControllerData { return result; } - RecruitFromConfigurationReply findWorkersForConfigurationDispatch(RecruitFromConfigurationRequest const& req) { + RecruitFromConfigurationReply findWorkersForConfigurationDispatch(RecruitFromConfigurationRequest const& req, + bool checkGoodRecruitment) { if (req.configuration.regions.size() > 1) { std::vector regions = req.configuration.regions; if (regions[0].priority == regions[1].priority && regions[1].dcId == clusterControllerDcId.get()) { @@ -1845,7 +1847,7 @@ class ClusterControllerData { bool setPrimaryDesired = false; try { - auto reply = findWorkersForConfigurationFromDC(req, regions[0].dcId); + auto reply = findWorkersForConfigurationFromDC(req, regions[0].dcId, checkGoodRecruitment); setPrimaryDesired = true; std::vector> dcPriority; dcPriority.push_back(regions[0].dcId); @@ -1862,7 +1864,8 @@ class ClusterControllerData { .detail("RecruitedTxnSystemDcId", regions[0].dcId); throw no_more_servers(); } catch (Error& e) { - if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) { + if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get() && + checkGoodRecruitment) { throw operation_failed(); } @@ -1872,7 +1875,7 @@ class ClusterControllerData { TraceEvent(SevWarn, "AttemptingRecruitmentInRemoteDc", id) .detail("SetPrimaryDesired", setPrimaryDesired) .error(e); - auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId); + auto reply = findWorkersForConfigurationFromDC(req, regions[1].dcId, checkGoodRecruitment); if (!setPrimaryDesired) { std::vector> dcPriority; dcPriority.push_back(regions[1].dcId); @@ -1890,7 +1893,8 @@ class ClusterControllerData { std::vector> dcPriority; dcPriority.push_back(req.configuration.regions[0].dcId); desiredDcIds.set(dcPriority); - auto reply = findWorkersForConfigurationFromDC(req, req.configuration.regions[0].dcId); + auto reply = + findWorkersForConfigurationFromDC(req, req.configuration.regions[0].dcId, checkGoodRecruitment); if (reply.isError()) { throw reply.getError(); } else if (req.configuration.regions[0].dcId == clusterControllerDcId.get()) { @@ -2059,7 +2063,7 @@ class ClusterControllerData { .detail("DesiredResolvers", req.configuration.getDesiredResolvers()) .detail("ActualResolvers", result.resolvers.size()); - if (!goodRecruitmentTime.isReady() && + if (!goodRecruitmentTime.isReady() && checkGoodRecruitment && (RoleFitness( SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog) .betterCount(RoleFitness(tlogs, ProcessClass::TLog, id_used)) || @@ -2125,7 +2129,7 @@ class ClusterControllerData { } RecruitFromConfigurationReply findWorkersForConfiguration(RecruitFromConfigurationRequest const& req) { - RecruitFromConfigurationReply rep = findWorkersForConfigurationDispatch(req); + RecruitFromConfigurationReply rep = findWorkersForConfigurationDispatch(req, true); if (g_network->isSimulated()) { try { // FIXME: The logic to pick a satellite in a remote region is not @@ -2143,7 +2147,7 @@ class ClusterControllerData { } } if (!remoteDCUsedAsSatellite) { - RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req); + RecruitFromConfigurationReply compare = findWorkersForConfigurationDispatch(req, false); std::map>, int> firstUsed; std::map>, int> secondUsed; From b0cec2984946b44251ab4c08423a89b6f35bf7ac Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 11:53:53 -0700 Subject: [PATCH 23/69] Enable unused-local-typedef clang warning --- cmake/ConfigureCompiler.cmake | 1 - fdbclient/json_spirit/json_spirit_writer_template.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 6379f7bf143..d49a2befe40 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -293,7 +293,6 @@ else() -Wno-unknown-pragmas -Wno-unknown-warning-option -Wno-unused-function - -Wno-unused-local-typedef -Wno-unused-parameter ) if (USE_CCACHE) diff --git a/fdbclient/json_spirit/json_spirit_writer_template.h b/fdbclient/json_spirit/json_spirit_writer_template.h index 1422ee272c0..0bf3f5d2e5d 100644 --- a/fdbclient/json_spirit/json_spirit_writer_template.h +++ b/fdbclient/json_spirit/json_spirit_writer_template.h @@ -32,8 +32,6 @@ inline char to_hex_char(unsigned int c) { template String_type non_printable_to_string(unsigned int c) { - typedef typename String_type::value_type Char_type; - String_type result(6, '\\'); result[1] = 'u'; From 25257f6f87155644c8a20a50db3a525313b4b891 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 12:42:24 -0700 Subject: [PATCH 24/69] Enable unused-function warning for clang --- cmake/ConfigureCompiler.cmake | 1 - fdbclient/ClientLibManagement.actor.cpp | 4 +- fdbclient/DatabaseConfiguration.cpp | 3 - fdbclient/FDBTypes.cpp | 8 ++ fdbclient/FDBTypes.h | 8 +- fdbserver/DeltaTree.h | 4 +- fdbserver/OldTLogServer_6_2.actor.cpp | 2 +- fdbserver/RestoreController.actor.cpp | 140 ++++++++++++------------ fdbserver/SimulatedCluster.actor.cpp | 6 +- flow/Tracing.actor.cpp | 34 +++--- flow/crc32c.cpp | 4 +- 11 files changed, 111 insertions(+), 103 deletions(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index d49a2befe40..400cead8113 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -292,7 +292,6 @@ else() -Wno-undefined-var-template -Wno-unknown-pragmas -Wno-unknown-warning-option - -Wno-unused-function -Wno-unused-parameter ) if (USE_CCACHE) diff --git a/fdbclient/ClientLibManagement.actor.cpp b/fdbclient/ClientLibManagement.actor.cpp index 8b24956ee3d..9ca0571e325 100644 --- a/fdbclient/ClientLibManagement.actor.cpp +++ b/fdbclient/ClientLibManagement.actor.cpp @@ -198,7 +198,7 @@ KeyRef chunkKeyFromNo(StringRef clientLibBinPrefix, size_t chunkNo, Arena& arena return clientLibBinPrefix.withSuffix(format("%06zu", chunkNo), arena); } -ClientLibPlatform getCurrentClientPlatform() { +[[maybe_unused]] ClientLibPlatform getCurrentClientPlatform() { #ifdef __x86_64__ #if defined(_WIN32) return ClientLibPlatform::X86_64_WINDOWS; @@ -707,4 +707,4 @@ ACTOR Future>> listClientLibraries(Database db, return result; } -} // namespace ClientLibManagement \ No newline at end of file +} // namespace ClientLibManagement diff --git a/fdbclient/DatabaseConfiguration.cpp b/fdbclient/DatabaseConfiguration.cpp index c2cb04bb2f5..d778b35845c 100644 --- a/fdbclient/DatabaseConfiguration.cpp +++ b/fdbclient/DatabaseConfiguration.cpp @@ -578,9 +578,6 @@ bool DatabaseConfiguration::setInternal(KeyRef key, ValueRef value) { return true; // All of the above options currently require recovery to take effect } -static KeyValueRef* lower_bound(VectorRef& config, KeyRef const& key) { - return std::lower_bound(config.begin(), config.end(), KeyValueRef(key, ValueRef()), KeyValueRef::OrderByKey()); -} static KeyValueRef const* lower_bound(VectorRef const& config, KeyRef const& key) { return std::lower_bound(config.begin(), config.end(), KeyValueRef(key, ValueRef()), KeyValueRef::OrderByKey()); } diff --git a/fdbclient/FDBTypes.cpp b/fdbclient/FDBTypes.cpp index 3639776e2d7..8ada7f2c083 100644 --- a/fdbclient/FDBTypes.cpp +++ b/fdbclient/FDBTypes.cpp @@ -65,3 +65,11 @@ std::string KeySelectorRef::toString() const { return format("%d+lastLessThan(%s)", offset, printable(key).c_str()); } } + +std::string describe(const std::string& s) { + return s; +} + +std::string describe(UID const& item) { + return item.shortString(); +} diff --git a/fdbclient/FDBTypes.h b/fdbclient/FDBTypes.h index 17ad22b93e1..d0eb4c0d0bb 100644 --- a/fdbclient/FDBTypes.h +++ b/fdbclient/FDBTypes.h @@ -188,18 +188,14 @@ inline std::string describe(const int item) { } // Allows describeList to work on a vector of std::string -static std::string describe(const std::string& s) { - return s; -} +std::string describe(const std::string& s); template std::string describe(Reference const& item) { return item->toString(); } -static std::string describe(UID const& item) { - return item.shortString(); -} +std::string describe(UID const& item); template std::string describe(T const& item) { diff --git a/fdbserver/DeltaTree.h b/fdbserver/DeltaTree.h index c1219bd71a9..2c1f7ea91fc 100644 --- a/fdbserver/DeltaTree.h +++ b/fdbserver/DeltaTree.h @@ -94,7 +94,7 @@ static int __lessOrEqualPowerOfTwo(unsigned int n) { } */ -static int perfectSubtreeSplitPoint(int subtree_size) { +static inline int perfectSubtreeSplitPoint(int subtree_size) { // return the inorder index of the root node in a subtree of the given size // consistent with the resulting binary search tree being "perfect" (having minimal height // and all missing nodes as far right as possible). @@ -103,7 +103,7 @@ static int perfectSubtreeSplitPoint(int subtree_size) { return std::min(s * 2 + 1, subtree_size - s - 1); } -static int perfectSubtreeSplitPointCached(int subtree_size) { +static inline int perfectSubtreeSplitPointCached(int subtree_size) { static uint16_t* points = nullptr; static const int max = 500; if (points == nullptr) { diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index e25b80f8818..4893a5da032 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -267,7 +267,7 @@ static StringRef stripTagMessagesKey(StringRef key) { return key.substr(sizeof(UID) + sizeof(Tag) + persistTagMessagesKeys.begin.size()); } -static StringRef stripTagMessageRefsKey(StringRef key) { +[[maybe_unused]] static StringRef stripTagMessageRefsKey(StringRef key) { return key.substr(sizeof(UID) + sizeof(Tag) + persistTagMessageRefsKeys.begin.size()); } diff --git a/fdbserver/RestoreController.actor.cpp b/fdbserver/RestoreController.actor.cpp index 860e6b2e569..a9d0444b741 100644 --- a/fdbserver/RestoreController.actor.cpp +++ b/fdbserver/RestoreController.actor.cpp @@ -37,7 +37,8 @@ #include "flow/Platform.h" #include "flow/actorcompiler.h" // This must be the last #include. -ACTOR static Future clearDB(Database cx); +// TODO: Support [[maybe_unused]] attribute for actors +// ACTOR static Future clearDB(Database cx); ACTOR static Future collectBackupFiles(Reference bc, std::vector* rangeFiles, std::vector* logFiles, @@ -76,7 +77,8 @@ ACTOR static Future notifyLoadersVersionBatchFinished(std::map notifyRestoreCompleted(Reference self, bool terminate); ACTOR static Future signalRestoreCompleted(Reference self, Database cx); -ACTOR static Future updateHeartbeatTime(Reference self); +// TODO: Support [[maybe_unused]] attribute for actors +// ACTOR static Future updateHeartbeatTime(Reference self); ACTOR static Future checkRolesLiveness(Reference self); void splitKeyRangeForAppliers(Reference batchData, @@ -900,16 +902,18 @@ ACTOR static Future buildRangeVersions(KeyRangeMap* pRangeVersion return Void(); } +/* ACTOR static Future clearDB(Database cx) { - wait(runRYWTransaction(cx, [](Reference tr) -> Future { - tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); - tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->clear(normalKeys); - return Void(); - })); - - return Void(); + wait(runRYWTransaction(cx, [](Reference tr) -> Future { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + tr->clear(normalKeys); + return Void(); + })); + + return Void(); } +*/ ACTOR static Future initializeVersionBatch(std::map appliersInterf, std::map loadersInterf, @@ -1135,67 +1139,69 @@ ACTOR static Future signalRestoreCompleted(Reference updateHeartbeatTime(Reference self) { - wait(self->recruitedRoles.getFuture()); - - int numRoles = self->loadersInterf.size() + self->appliersInterf.size(); - state std::map::iterator loader = self->loadersInterf.begin(); - state std::map::iterator applier = self->appliersInterf.begin(); - state std::vector> fReplies(numRoles, Never()); // TODO: Reserve memory for this vector - state std::vector nodes; - state int index = 0; - state Future fTimeout = Void(); - - // Initialize nodes only once - std::transform(self->loadersInterf.begin(), - self->loadersInterf.end(), - std::back_inserter(nodes), - [](const std::pair& in) { return in.first; }); - std::transform(self->appliersInterf.begin(), - self->appliersInterf.end(), - std::back_inserter(nodes), - [](const std::pair& in) { return in.first; }); - - loop { - loader = self->loadersInterf.begin(); - applier = self->appliersInterf.begin(); - index = 0; - std::fill(fReplies.begin(), fReplies.end(), Never()); - // ping loaders and appliers - while (loader != self->loadersInterf.end()) { - fReplies[index] = loader->second.heartbeat.getReply(RestoreSimpleRequest()); - loader++; - index++; - } - while (applier != self->appliersInterf.end()) { - fReplies[index] = applier->second.heartbeat.getReply(RestoreSimpleRequest()); - applier++; - index++; - } - - fTimeout = delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_DELAY); - - // Here we have to handle error, otherwise controller worker will fail and exit. - try { - wait(waitForAll(fReplies) || fTimeout); - } catch (Error& e) { - // This should be an ignorable error. - TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreUpdateHeartbeatError").error(e); - } - - // Update the most recent heart beat time for each role - for (int i = 0; i < fReplies.size(); ++i) { - if (!fReplies[i].isError() && fReplies[i].isReady()) { - double currentTime = now(); - auto item = self->rolesHeartBeatTime.emplace(nodes[i], currentTime); - item.first->second = currentTime; - } - } - wait(fTimeout); // Ensure not updating heartbeat too quickly - } + wait(self->recruitedRoles.getFuture()); + + int numRoles = self->loadersInterf.size() + self->appliersInterf.size(); + state std::map::iterator loader = self->loadersInterf.begin(); + state std::map::iterator applier = self->appliersInterf.begin(); + state std::vector> fReplies(numRoles, Never()); // TODO: Reserve memory for this vector + state std::vector nodes; + state int index = 0; + state Future fTimeout = Void(); + + // Initialize nodes only once + std::transform(self->loadersInterf.begin(), + self->loadersInterf.end(), + std::back_inserter(nodes), + [](const std::pair& in) { return in.first; }); + std::transform(self->appliersInterf.begin(), + self->appliersInterf.end(), + std::back_inserter(nodes), + [](const std::pair& in) { return in.first; }); + + loop { + loader = self->loadersInterf.begin(); + applier = self->appliersInterf.begin(); + index = 0; + std::fill(fReplies.begin(), fReplies.end(), Never()); + // ping loaders and appliers + while (loader != self->loadersInterf.end()) { + fReplies[index] = loader->second.heartbeat.getReply(RestoreSimpleRequest()); + loader++; + index++; + } + while (applier != self->appliersInterf.end()) { + fReplies[index] = applier->second.heartbeat.getReply(RestoreSimpleRequest()); + applier++; + index++; + } + + fTimeout = delay(SERVER_KNOBS->FASTRESTORE_HEARTBEAT_DELAY); + + // Here we have to handle error, otherwise controller worker will fail and exit. + try { + wait(waitForAll(fReplies) || fTimeout); + } catch (Error& e) { + // This should be an ignorable error. + TraceEvent(g_network->isSimulated() ? SevWarnAlways : SevError, "FastRestoreUpdateHeartbeatError").error(e); + } + + // Update the most recent heart beat time for each role + for (int i = 0; i < fReplies.size(); ++i) { + if (!fReplies[i].isError() && fReplies[i].isReady()) { + double currentTime = now(); + auto item = self->rolesHeartBeatTime.emplace(nodes[i], currentTime); + item.first->second = currentTime; + } + } + wait(fTimeout); // Ensure not updating heartbeat too quickly + } } +*/ // Check if a restore role dies or disconnected ACTOR static Future checkRolesLiveness(Reference self) { diff --git a/fdbserver/SimulatedCluster.actor.cpp b/fdbserver/SimulatedCluster.actor.cpp index 9062c20b588..62d1f71db9d 100644 --- a/fdbserver/SimulatedCluster.actor.cpp +++ b/fdbserver/SimulatedCluster.actor.cpp @@ -1223,7 +1223,7 @@ void SimulationConfig::set_config(std::string config) { db.set(kv.first, kv.second); } -StringRef StringRefOf(const char* s) { +[[maybe_unused]] StringRef StringRefOf(const char* s) { return StringRef((uint8_t*)s, strlen(s)); } @@ -2188,7 +2188,7 @@ bool rocksDBEnabled = false; #endif // Populates the TestConfig fields according to what is found in the test file. -void checkTestConf(const char* testFile, TestConfig* testConfig) {} +[[maybe_unused]] void checkTestConf(const char* testFile, TestConfig* testConfig) {} } // namespace @@ -2301,4 +2301,4 @@ ACTOR void setupAndRun(std::string dataFolder, destructed = true; wait(Never()); ASSERT(false); -} \ No newline at end of file +} diff --git a/flow/Tracing.actor.cpp b/flow/Tracing.actor.cpp index 4cb35bc117b..173fbe1196b 100644 --- a/flow/Tracing.actor.cpp +++ b/flow/Tracing.actor.cpp @@ -122,26 +122,28 @@ ACTOR Future simulationStartServer() { } } +/* // Runs on an interval, printing debug information and performing other // connection tasks. ACTOR Future traceLog(int* pendingMessages, bool* sendError) { - state bool sendErrorReset = false; - - loop { - TraceEvent("TracingSpanQueueSize").detail("PendingMessages", *pendingMessages); - - // Wait at least one full loop before attempting to send messages - // again. - if (sendErrorReset) { - sendErrorReset = false; - *sendError = false; - } else if (*sendError) { - sendErrorReset = true; - } - - wait(delay(kQueueSizeLogInterval)); - } + state bool sendErrorReset = false; + + loop { + TraceEvent("TracingSpanQueueSize").detail("PendingMessages", *pendingMessages); + + // Wait at least one full loop before attempting to send messages + // again. + if (sendErrorReset) { + sendErrorReset = false; + *sendError = false; + } else if (*sendError) { + sendErrorReset = true; + } + + wait(delay(kQueueSizeLogInterval)); + } } +*/ struct UDPTracer : public ITracer { protected: diff --git a/flow/crc32c.cpp b/flow/crc32c.cpp index 759bfd31efb..e9339333a56 100644 --- a/flow/crc32c.cpp +++ b/flow/crc32c.cpp @@ -37,7 +37,7 @@ #include "flow/Platform.h" #include "crc32c-generated-constants.cpp" -static uint32_t append_trivial(uint32_t crc, const uint8_t* input, size_t length) { +[[maybe_unused]] static uint32_t append_trivial(uint32_t crc, const uint8_t* input, size_t length) { for (size_t i = 0; i < length; ++i) { crc = crc ^ input[i]; for (int j = 0; j < 8; j++) @@ -49,7 +49,7 @@ static uint32_t append_trivial(uint32_t crc, const uint8_t* input, size_t length /* Table-driven software version as a fall-back. This is about 15 times slower than using the hardware instructions. This assumes little-endian integers, as is the case on Intel processors that the assembler code here is for. */ -static uint32_t append_adler_table(uint32_t crci, const uint8_t* input, size_t length) { +[[maybe_unused]] static uint32_t append_adler_table(uint32_t crci, const uint8_t* input, size_t length) { const uint8_t* next = input; uint64_t crc; From 168e75bb1e88894a77865d9482fec0bd988fb10a Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 13:25:16 -0700 Subject: [PATCH 25/69] Remove unused shouldNotHaveFriends* functions --- fdbrpc/FlowTests.actor.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fdbrpc/FlowTests.actor.cpp b/fdbrpc/FlowTests.actor.cpp index 3d67b25e0e5..65d3d36019f 100644 --- a/fdbrpc/FlowTests.actor.cpp +++ b/fdbrpc/FlowTests.actor.cpp @@ -1287,8 +1287,6 @@ TEST_CASE("/fdbrpc/flow/wait_expression_after_cancel") { template struct ShouldNotGoIntoClassContextStack; -ACTOR static Future shouldNotHaveFriends(); - class Foo1 { public: explicit Foo1(int x) : x(x) {} @@ -1363,8 +1361,6 @@ ACTOR Future Outer::Foo5::fooActor(Outer::Foo5* self) { return self->x; } -ACTOR static Future shouldNotHaveFriends2(); - // Meant to be run with -fsanitize=undefined TEST_CASE("/flow/DeterministicRandom/SignedOverflow") { deterministicRandom()->randomInt(std::numeric_limits::min(), 0); From ebcc023b6f2d1a157080d3e71ebc03f44db222a3 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 13:52:52 -0700 Subject: [PATCH 26/69] Enable missing-field-initializers clang warning --- bindings/java/JavaWorkload.cpp | 6 +++--- cmake/ConfigureCompiler.cmake | 1 - fdbclient/FileBackupAgent.actor.cpp | 8 ++++---- fdbclient/SpecialKeySpace.actor.cpp | 2 +- fdbrpc/FlowTransport.h | 4 ++-- fdbserver/VersionedBTree.actor.cpp | 4 ++-- flow/network.h | 2 +- 7 files changed, 13 insertions(+), 14 deletions(-) diff --git a/bindings/java/JavaWorkload.cpp b/bindings/java/JavaWorkload.cpp index 555a6cb434b..1bf6c7ff4f6 100644 --- a/bindings/java/JavaWorkload.cpp +++ b/bindings/java/JavaWorkload.cpp @@ -176,9 +176,9 @@ void promiseSend(JNIEnv, jclass, jlong self, jboolean value) { struct JNIError { JNIEnv* env; - jthrowable throwable = nullptr; - const char* file; - int line; + jthrowable throwable{ nullptr }; + const char* file{ nullptr }; + int line{ 0 }; std::string location() const { if (file == nullptr) { diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 400cead8113..61d800c30ca 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -286,7 +286,6 @@ else() -Wno-delete-non-virtual-dtor -Wno-format -Wno-mismatched-tags - -Wno-missing-field-initializers -Wno-sign-compare -Wno-tautological-pointer-compare -Wno-undefined-var-template diff --git a/fdbclient/FileBackupAgent.actor.cpp b/fdbclient/FileBackupAgent.actor.cpp index b42b192435d..3c6dba50e2e 100644 --- a/fdbclient/FileBackupAgent.actor.cpp +++ b/fdbclient/FileBackupAgent.actor.cpp @@ -193,10 +193,10 @@ class RestoreConfig : public KeyBackedConfig { struct RestoreFile { Version version; std::string fileName; - bool isRange; // false for log file - int64_t blockSize; - int64_t fileSize; - Version endVersion; // not meaningful for range files + bool isRange{ false }; // false for log file + int64_t blockSize{ 0 }; + int64_t fileSize{ 0 }; + Version endVersion{ ::invalidVersion }; // not meaningful for range files Tuple pack() const { return Tuple() diff --git a/fdbclient/SpecialKeySpace.actor.cpp b/fdbclient/SpecialKeySpace.actor.cpp index bea850dea5f..cd10cd93049 100644 --- a/fdbclient/SpecialKeySpace.actor.cpp +++ b/fdbclient/SpecialKeySpace.actor.cpp @@ -1961,7 +1961,7 @@ void parse(StringRef& val, WaitState& w) { } void parse(StringRef& val, time_t& t) { - struct tm tm = { 0 }; + struct tm tm; #ifdef _WIN32 std::istringstream s(val.toString()); s.imbue(std::locale(setlocale(LC_TIME, nullptr))); diff --git a/fdbrpc/FlowTransport.h b/fdbrpc/FlowTransport.h index 78f91b29f2e..abc42e9d707 100644 --- a/fdbrpc/FlowTransport.h +++ b/fdbrpc/FlowTransport.h @@ -39,9 +39,9 @@ class Endpoint { // Endpoint represents a particular service (e.g. a serialized Promise or PromiseStream) // An endpoint is either "local" (used for receiving data) or "remote" (used for sending data) constexpr static FileIdentifier file_identifier = 10618805; - typedef UID Token; + using Token = UID; NetworkAddressList addresses; - Token token; + Token token{}; Endpoint() {} Endpoint(const NetworkAddressList& addresses, Token token) : addresses(addresses), token(token) { diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index bec413db23b..8f011f5a996 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -3873,8 +3873,8 @@ struct SplitStringRef { struct const_iterator { const uint8_t* ptr; - const uint8_t* end; - const uint8_t* next; + const uint8_t* end{ nullptr }; + const uint8_t* next{ nullptr }; inline bool operator==(const const_iterator& rhs) const { return ptr == rhs.ptr; } inline bool operator!=(const const_iterator& rhs) const { return !(*this == rhs); } diff --git a/flow/network.h b/flow/network.h index 8af923b197c..60a190c0afd 100644 --- a/flow/network.h +++ b/flow/network.h @@ -283,7 +283,7 @@ struct hash { struct NetworkAddressList { NetworkAddress address; - Optional secondaryAddress; + Optional secondaryAddress{}; bool operator==(NetworkAddressList const& r) const { return address == r.address && secondaryAddress == r.secondaryAddress; From d0c9cf4fb0e39a969d0878be5f9c0029feeb72de Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 14:16:14 -0700 Subject: [PATCH 27/69] Enable mismatched-tags clang warning --- cmake/ConfigureCompiler.cmake | 1 - fdbclient/ActorLineageProfiler.h | 2 +- fdbclient/ConfigTransactionInterface.h | 10 +++++----- fdbrpc/FlowTransport.h | 2 +- fdbserver/DataDistribution.actor.cpp | 2 +- fdbserver/DeltaTree.h | 5 +++-- flow/Trace.h | 4 ++-- flow/flow.h | 3 ++- flow/network.h | 2 +- flow/serialize.h | 3 ++- flow/singleton.h | 2 +- 11 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 61d800c30ca..9343ee402fd 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -285,7 +285,6 @@ else() -Wno-comment -Wno-delete-non-virtual-dtor -Wno-format - -Wno-mismatched-tags -Wno-sign-compare -Wno-tautological-pointer-compare -Wno-undefined-var-template diff --git a/fdbclient/ActorLineageProfiler.h b/fdbclient/ActorLineageProfiler.h index a55d1541e13..07b7c309663 100644 --- a/fdbclient/ActorLineageProfiler.h +++ b/fdbclient/ActorLineageProfiler.h @@ -96,7 +96,7 @@ struct ConfigError { class ProfilerConfigT { private: // private types using Lock = std::unique_lock; - friend class crossbow::create_static; + friend struct crossbow::create_static; private: // members std::shared_ptr ingestor = std::make_shared(); diff --git a/fdbclient/ConfigTransactionInterface.h b/fdbclient/ConfigTransactionInterface.h index 6e71b72457b..c6f2aa920fc 100644 --- a/fdbclient/ConfigTransactionInterface.h +++ b/fdbclient/ConfigTransactionInterface.h @@ -188,11 +188,11 @@ struct ConfigTransactionInterface { public: static constexpr FileIdentifier file_identifier = 982485; - struct RequestStream getGeneration; - struct RequestStream get; - struct RequestStream getClasses; - struct RequestStream getKnobs; - struct RequestStream commit; + class RequestStream getGeneration; + class RequestStream get; + class RequestStream getClasses; + class RequestStream getKnobs; + class RequestStream commit; ConfigTransactionInterface(); void setupWellKnownEndpoints(); diff --git a/fdbrpc/FlowTransport.h b/fdbrpc/FlowTransport.h index abc42e9d707..24daae400a9 100644 --- a/fdbrpc/FlowTransport.h +++ b/fdbrpc/FlowTransport.h @@ -134,7 +134,7 @@ class NetworkMessageReceiver { } }; -struct TransportData; +class TransportData; struct Peer : public ReferenceCounted { TransportData* transport; diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 7249934267e..21697dedc1d 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -47,7 +47,7 @@ #include "flow/serialize.h" class TCTeamInfo; -struct TCMachineInfo; +class TCMachineInfo; class TCMachineTeamInfo; namespace { diff --git a/fdbserver/DeltaTree.h b/fdbserver/DeltaTree.h index 2c1f7ea91fc..4a2f77c52f9 100644 --- a/fdbserver/DeltaTree.h +++ b/fdbserver/DeltaTree.h @@ -349,7 +349,7 @@ struct DeltaTree { } }; - struct Cursor; + class Cursor; // A Mirror is an accessor for a DeltaTree which allows insertion and reading. Both operations are done // using cursors which point to and share nodes in an tree that is built on-demand and mirrors the compressed @@ -515,7 +515,8 @@ struct DeltaTree { // Cursor provides a way to seek into a DeltaTree and iterate over its contents // All Cursors from a Mirror share the same decoded node 'cache' (tree of DecodedNodes) - struct Cursor { + class Cursor { + public: Cursor() : mirror(nullptr), node(nullptr) {} Cursor(Mirror* r) : mirror(r), node(mirror->root) {} diff --git a/flow/Trace.h b/flow/Trace.h index aeaabb4373e..52dc94aab76 100644 --- a/flow/Trace.h +++ b/flow/Trace.h @@ -588,8 +588,8 @@ void removeTraceRole(std::string const& role); void retrieveTraceLogIssues(std::set& out); void setTraceLogGroup(const std::string& role); template -struct Future; -struct Void; +class Future; +class Void; Future pingTraceLogWriterThread(); enum trace_clock_t { TRACE_CLOCK_NOW, TRACE_CLOCK_REALTIME }; diff --git a/flow/flow.h b/flow/flow.h index 366ac011759..b331107ee54 100644 --- a/flow/flow.h +++ b/flow/flow.h @@ -445,7 +445,8 @@ struct LineageProperties : LineagePropertiesBase { } }; -struct ActorLineage : ThreadSafeReferenceCounted { +class ActorLineage : public ThreadSafeReferenceCounted { +public: friend class LineageReference; struct Property { diff --git a/flow/network.h b/flow/network.h index 60a190c0afd..a0710f5ce4c 100644 --- a/flow/network.h +++ b/flow/network.h @@ -407,7 +407,7 @@ class IEventFD : public ReferenceCounted { }; // forward declare SendBuffer, declared in serialize.h -struct SendBuffer; +class SendBuffer; class IConnection { public: diff --git a/flow/serialize.h b/flow/serialize.h index 07f70b1f242..ac48fa9740f 100644 --- a/flow/serialize.h +++ b/flow/serialize.h @@ -851,7 +851,8 @@ struct ISerializeSource { }; template -struct MakeSerializeSource : ISerializeSource { +class MakeSerializeSource : public ISerializeSource { +public: using value_type = V; void serializePacketWriter(PacketWriter& w) const override { ObjectWriter writer([&](size_t size) { return w.writeBytes(size); }, AssumeVersion(w.protocolVersion())); diff --git a/flow/singleton.h b/flow/singleton.h index b5656046747..7d193e8dde0 100644 --- a/flow/singleton.h +++ b/flow/singleton.h @@ -272,4 +272,4 @@ typename singleton::pointer singleton::instance_ = nullp template M singleton::mutex_; -} // namespace crossbow \ No newline at end of file +} // namespace crossbow From 8a69aa08a2b8045ee60c8444261d03f951cbb81f Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 14:25:37 -0700 Subject: [PATCH 28/69] Enable tautological-pointer-compare clang warning --- cmake/ConfigureCompiler.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 9343ee402fd..fe055612a08 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -286,7 +286,6 @@ else() -Wno-delete-non-virtual-dtor -Wno-format -Wno-sign-compare - -Wno-tautological-pointer-compare -Wno-undefined-var-template -Wno-unknown-pragmas -Wno-unknown-warning-option From c7b28abaf0abfe6575980095d1eef45331b77c45 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 16:53:37 -0700 Subject: [PATCH 29/69] Enable unknown-pragmas warning for clang --- cmake/ConfigureCompiler.cmake | 1 - fdbclient/VersionedMap.h | 2 ++ flow/actorcompiler.h | 2 ++ flow/flow.h | 2 ++ flow/genericactors.actor.h | 3 +++ flow/serialize.h | 2 ++ 6 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index fe055612a08..f85cc84f75a 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -287,7 +287,6 @@ else() -Wno-format -Wno-sign-compare -Wno-undefined-var-template - -Wno-unknown-pragmas -Wno-unknown-warning-option -Wno-unused-parameter ) diff --git a/fdbclient/VersionedMap.h b/fdbclient/VersionedMap.h index 32371689a2a..1cd78a18285 100644 --- a/fdbclient/VersionedMap.h +++ b/fdbclient/VersionedMap.h @@ -38,7 +38,9 @@ // PTree also supports efficient finger searches. namespace PTreeImpl { +#ifdef _MSC_VER #pragma warning(disable : 4800) +#endif template struct PTree : public ReferenceCounted>, FastAllocated>, NonCopyable { diff --git a/flow/actorcompiler.h b/flow/actorcompiler.h index a20faf408d9..e783900e45d 100644 --- a/flow/actorcompiler.h +++ b/flow/actorcompiler.h @@ -70,4 +70,6 @@ T waitNext(const FutureStream&); #define THIS_ADDR uintptr_t(nullptr) #endif +#ifdef _MSC_VER #pragma warning(disable : 4355) // 'this' : used in base member initializer list +#endif diff --git a/flow/flow.h b/flow/flow.h index b331107ee54..a3d0e5d7a72 100644 --- a/flow/flow.h +++ b/flow/flow.h @@ -24,10 +24,12 @@ #include "flow/FastRef.h" #pragma once +#ifdef _MSC_VER #pragma warning(disable : 4244 4267) // SOMEDAY: Carefully check for integer overflow issues (e.g. size_t to int // conversions like this suppresses) #pragma warning(disable : 4345) #pragma warning(error : 4239) +#endif #include #include diff --git a/flow/genericactors.actor.h b/flow/genericactors.actor.h index 583648ff070..73df375c397 100644 --- a/flow/genericactors.actor.h +++ b/flow/genericactors.actor.h @@ -37,7 +37,10 @@ #include "flow/Util.h" #include "flow/IndexedSet.h" #include "flow/actorcompiler.h" // This must be the last #include. + +#ifdef _MSC_VER #pragma warning(disable : 4355) // 'this' : used in base member initializer list +#endif ACTOR template Future traceAfter(Future what, const char* type, const char* key, X value, bool traceErrors = false) { diff --git a/flow/serialize.h b/flow/serialize.h index ac48fa9740f..9e0abbdab16 100644 --- a/flow/serialize.h +++ b/flow/serialize.h @@ -268,7 +268,9 @@ inline void load(Archive& ar, std::map& value) { ASSERT(ar.protocolVersion().isValid()); } +#ifdef _MSC_VER #pragma intrinsic(memcpy) +#endif #if VALGRIND static bool valgrindCheck(const void* data, int bytes, const char* context) { From 27db99a77f28ee6231b13b4fb2e0473a4d85755a Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 22:03:09 -0700 Subject: [PATCH 30/69] Enable clang comment warnings --- cmake/ConfigureCompiler.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index f85cc84f75a..0890dcf0d12 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -282,7 +282,6 @@ else() -Woverloaded-virtual -Wshift-sign-overflow # Here's the current set of warnings we need to explicitly disable to compile warning-free with clang 11 - -Wno-comment -Wno-delete-non-virtual-dtor -Wno-format -Wno-sign-compare From 7f09bdbda450cbfdb8e4db0d61a5888347e1efb9 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 28 Oct 2021 22:34:20 -0700 Subject: [PATCH 31/69] Remove -Wclass-memaccess for clang --- cmake/ConfigureCompiler.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ConfigureCompiler.cmake b/cmake/ConfigureCompiler.cmake index 0890dcf0d12..160c46f5aef 100644 --- a/cmake/ConfigureCompiler.cmake +++ b/cmake/ConfigureCompiler.cmake @@ -313,7 +313,7 @@ else() -fvisibility=hidden -Wreturn-type -fPIC) - if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86") + if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^x86" AND NOT CLANG) add_compile_options($<$:-Wclass-memaccess>) endif() if (GPERFTOOLS_FOUND AND GCC) From af51de902f1482a5c39d9133d90979f04f381fd9 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Wed, 22 Sep 2021 12:55:37 -0700 Subject: [PATCH 32/69] Add documation about network options. --- .../sphinx/source/api-common.rst.inc | 45 ++++++++++++++++++ documentation/sphinx/source/api-python.rst | 46 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index f70e16a5d6b..df2b819f79c 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -588,3 +588,48 @@ .. |locality-get-addresses-for-key-blurb| replace:: Returns a list of public network addresses as strings, one for each of the storage servers responsible for storing ``key`` and its associated value. + +.. |option-knob| replace:: + + Sets internal tuning or debugging knobs. + +.. |option-tls-verify-peers| replace:: + + Sets the peer certificate field verification criteria. + +.. |option-tls-ca-bytes| replace:: + + Sets the certificate authority bundle. + +.. |option-tls-ca-path| replace:: + + Sets the file from which to load the certificate authority bundle. + +.. |option-tls-password| replace:: + + Sets the passphrase for encrypted private key. Password should be set before setting the key for the password to be used. + +.. |option-disable-multi-version-client-api| replace:: + + Disables the multi-version client API and instead uses the local client directly. Must be set before setting up the network. + +.. |option-set-disable-local-client| replace:: + + Prevents connections through the local client, allowing only connections through externally loaded client libraries. + +.. |option-set-client-threads-per-version| replace:: + + Spawns multiple worker threads for each version of the client that is loaded. Setting this to a number greater than one implies disable_local_client. + +.. |option-disable-client-statistics-logging| replace:: + + Disables logging of client statistics, such as sampled transaction activity. + +.. |option-enable-run-loop-profiling| replace:: + + Enables debugging feature to perform run loop profiling. Requires trace logging to be enabled. WARNING: this feature is not recommended for use in production. + + +.. |option-set-distributed-client-tracer| replace:: + + Sets a tracer to run on the client. Should be set to the same value as the tracer set on the server. \ No newline at end of file diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 2897be0153e..300f1d1d639 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -125,6 +125,10 @@ After importing the ``fdb`` module and selecting an API version, you probably wa .. note:: |network-options-warning| + .. method :: fdb.options.set_knob("knob_name=value") + + |option-knob| + .. method :: fdb.options.set_trace_enable( output_directory=None ) |option-trace-enable-blurb| @@ -188,6 +192,48 @@ After importing the ``fdb`` module and selecting an API version, you probably wa .. method :: fdb.options.set_tls_key_bytes(bytes) |option-tls-key-bytes| + + .. method :: fdb.options.set_tls_verify_peers(verification_pattern) + + |option-tls-verify-peers| + + .. method :: fdb.options.set_tls_ca_bytes(ca_bundle) + + |option-tls-ca-bytes| + + .. method :: fdb.options.set_tls_ca_path(path) + + |option-tls-ca-path| + + .. method :: fdb.options.set_tls_password(password) + + |option-tls-password| + + .. method :: fdb.options.set_disable_multi_version_client_api() + + |option-disable-multi-version-client-api| + + .. method :: fdb.options.set_disable_local_client() + + |option-set-disable-local-client| + + .. method :: fdb.options.set_ client_threads_per_version(number) + + |option-set-client-threads-per-version| + + .. method :: fdb.options.set_disable_client_statistics_logging() + + |option-disable-client-statistics-logging| + + .. method :: fdb.options.set_enable_run_loop_profiling() + + |option-enable-run-loop-profiling| + + .. method :: fdb.options.set_distributed_client_tracer(tracer_type) + + |option-set-distributed-client-tracer| + + Please refer to fdboptions.py (generated) for a comprehensive list of options. .. _api-python-keys: From 90b231e96ea1037ece4f7b7d84d4162b2803c460 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Fri, 1 Oct 2021 11:05:15 -0700 Subject: [PATCH 33/69] Add link to client knobs. --- documentation/sphinx/source/api-common.rst.inc | 2 +- documentation/sphinx/source/api-python.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index df2b819f79c..2cba7718dd7 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -591,7 +591,7 @@ .. |option-knob| replace:: - Sets internal tuning or debugging knobs. + Sets internal tuning or debugging knobs. Available knobs could be found at https://github.com/apple/foundationdb/blob/master/fdbclient/ClientKnobs.h. .. |option-tls-verify-peers| replace:: diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 300f1d1d639..7f2b9c4af00 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -217,7 +217,7 @@ After importing the ``fdb`` module and selecting an API version, you probably wa |option-set-disable-local-client| - .. method :: fdb.options.set_ client_threads_per_version(number) + .. method :: fdb.options.set_client_threads_per_version(number) |option-set-client-threads-per-version| From 648bd336b5bf36a0423978f528ac95ffbc8448aa Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Mon, 1 Nov 2021 14:32:24 -0700 Subject: [PATCH 34/69] resolve comments --- documentation/sphinx/source/api-common.rst.inc | 2 +- documentation/sphinx/source/api-python.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 2cba7718dd7..70378d524c8 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -591,7 +591,7 @@ .. |option-knob| replace:: - Sets internal tuning or debugging knobs. Available knobs could be found at https://github.com/apple/foundationdb/blob/master/fdbclient/ClientKnobs.h. + Sets internal tuning or debugging knobs. The argument to this function should be a string representing the knob name and the value, e.g. "transaction_size_limit=1000". .. |option-tls-verify-peers| replace:: diff --git a/documentation/sphinx/source/api-python.rst b/documentation/sphinx/source/api-python.rst index 7f2b9c4af00..18fbd01adb2 100644 --- a/documentation/sphinx/source/api-python.rst +++ b/documentation/sphinx/source/api-python.rst @@ -125,7 +125,7 @@ After importing the ``fdb`` module and selecting an API version, you probably wa .. note:: |network-options-warning| - .. method :: fdb.options.set_knob("knob_name=value") + .. method :: fdb.options.set_knob(knob) |option-knob| From e08721c7f4c8cac9caa03e7931f1de69d69e19be Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 15:49:54 -0700 Subject: [PATCH 35/69] Added flow/serialize/Downgrade unit tests --- flow/serialize.cpp | 89 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/flow/serialize.cpp b/flow/serialize.cpp index 7b1e03b262e..a4570721b9d 100644 --- a/flow/serialize.cpp +++ b/flow/serialize.cpp @@ -18,8 +18,9 @@ * limitations under the License. */ -#include "flow/serialize.h" #include "flow/network.h" +#include "flow/serialize.h" +#include "flow/UnitTest.h" _AssumeVersion::_AssumeVersion(ProtocolVersion version) : v(version) { if (!version.isValid()) { @@ -38,3 +39,89 @@ const void* BinaryReader::readBytes(int bytes) { begin = e; return b; } + +namespace { + +auto const oldKey = "oldKey"_sr; +auto const newKey = "newKey"_sr; + +struct _Struct { + static constexpr FileIdentifier file_identifier = 2340487; + int oldField{ 0 }; +}; + +struct OldStruct : public _Struct { + void setFields() { oldField = 1; } + bool isSet() const { return oldField == 1; } + + template + void serialize(Archive& ar) { + serializer(ar, oldField); + } +}; + +struct NewStruct : public _Struct { + int newField{ 0 }; + + bool isSet() const { return oldField == 1 && newField == 2; } + void setFields() { + oldField = 1; + newField = 2; + } + + template + void serialize(Archive& ar) { + serializer(ar, oldField, newField); + } +}; + +void verifyData(StringRef value, int numObjects) { + { + // use BinaryReader + BinaryReader reader(value, IncludeVersion()); + std::vector data; + reader >> data; + ASSERT_EQ(data.size(), numObjects); + for (const auto& object : data) { + ASSERT(object.isSet()); + } + } + { + // use ArenaReader + ArenaReader reader(Arena(), value, IncludeVersion()); + std::vector data; + reader >> data; + ASSERT_EQ(data.size(), numObjects); + for (const auto& oldObject : data) { + ASSERT(oldObject.isSet()); + } + } +} + +} // namespace + +TEST_CASE("flow/serialize/Downgrade/WriteOld") { + BinaryWriter writer(IncludeVersion(g_network->protocolVersion())); + auto const numObjects = deterministicRandom()->randomInt(1, 101); + std::vector data(numObjects); + for (auto& oldObject : data) { + oldObject.setFields(); + } + writer << data; + verifyData(writer.toValue(), numObjects); + return Void(); +} + +TEST_CASE("flow/serialize/Downgrade/WriteNew") { + auto protocolVersion = g_network->protocolVersion(); + protocolVersion.addObjectSerializerFlag(); + ObjectWriter writer(IncludeVersion(protocolVersion)); + auto const numObjects = deterministicRandom()->randomInt(1, 101); + std::vector data(numObjects); + for (auto& newObject : data) { + newObject.setFields(); + } + writer.serialize(data); + verifyData(writer.toStringRef(), numObjects); + return Void(); +} From 45cff017c242cc85eef3c95226558c32b1a165af Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 15:50:48 -0700 Subject: [PATCH 36/69] Remove Downgrade workload --- fdbserver/CMakeLists.txt | 1 - fdbserver/workloads/Downgrade.actor.cpp | 168 ------------------------ 2 files changed, 169 deletions(-) delete mode 100644 fdbserver/workloads/Downgrade.actor.cpp diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index f9fa0aad46c..ca7d7d6db5b 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -182,7 +182,6 @@ set(FDBSERVER_SRCS workloads/DDMetricsExclude.actor.cpp workloads/DiskDurability.actor.cpp workloads/DiskDurabilityTest.actor.cpp - workloads/Downgrade.actor.cpp workloads/DummyWorkload.actor.cpp workloads/ExternalWorkload.actor.cpp workloads/FastTriggeredWatches.actor.cpp diff --git a/fdbserver/workloads/Downgrade.actor.cpp b/fdbserver/workloads/Downgrade.actor.cpp deleted file mode 100644 index e5157bcf8d3..00000000000 --- a/fdbserver/workloads/Downgrade.actor.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Downgrade.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2020 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "fdbclient/NativeAPI.actor.h" -#include "fdbserver/TesterInterface.actor.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "flow/serialize.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -struct DowngradeWorkload : TestWorkload { - - static constexpr const char* NAME = "Downgrade"; - Key oldKey, newKey; - int numObjects; - - DowngradeWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - oldKey = getOption(options, LiteralStringRef("oldKey"), LiteralStringRef("oldKey")); - newKey = getOption(options, LiteralStringRef("newKey"), LiteralStringRef("newKey")); - numObjects = getOption(options, LiteralStringRef("numOptions"), deterministicRandom()->randomInt(0, 100)); - } - - struct _Struct { - static constexpr FileIdentifier file_identifier = 2340487; - int oldField = 0; - }; - - struct OldStruct : public _Struct { - void setFields() { oldField = 1; } - bool isSet() const { return oldField == 1; } - - template - void serialize(Archive& ar) { - serializer(ar, oldField); - } - }; - - struct NewStruct : public _Struct { - int newField = 0; - - bool isSet() const { return oldField == 1 && newField == 2; } - void setFields() { - oldField = 1; - newField = 2; - } - - template - void serialize(Archive& ar) { - serializer(ar, oldField, newField); - } - }; - - ACTOR static Future writeOld(Database cx, int numObjects, Key key) { - BinaryWriter writer(IncludeVersion(g_network->protocolVersion())); - std::vector data(numObjects); - for (auto& oldObject : data) { - oldObject.setFields(); - } - writer << data; - state Value value = writer.toValue(); - - state Transaction tr(cx); - loop { - try { - tr.set(key, value); - wait(tr.commit()); - return Void(); - } catch (Error& e) { - wait(tr.onError(e)); - } - } - } - - ACTOR static Future writeNew(Database cx, int numObjects, Key key) { - ProtocolVersion protocolVersion = g_network->protocolVersion(); - protocolVersion.addObjectSerializerFlag(); - ObjectWriter writer(IncludeVersion(protocolVersion)); - std::vector data(numObjects); - for (auto& newObject : data) { - newObject.setFields(); - } - writer.serialize(data); - state Value value = writer.toStringRef(); - - state Transaction tr(cx); - loop { - try { - tr.set(key, value); - wait(tr.commit()); - return Void(); - } catch (Error& e) { - wait(tr.onError(e)); - } - } - } - - ACTOR static Future readData(Database cx, int numObjects, Key key) { - state Transaction tr(cx); - state Value value; - - loop { - try { - Optional _value = wait(tr.get(key)); - ASSERT(_value.present()); - value = _value.get(); - break; - } catch (Error& e) { - wait(tr.onError(e)); - } - } - - { - // use BinaryReader - BinaryReader reader(value, IncludeVersion()); - std::vector data; - reader >> data; - ASSERT(data.size() == numObjects); - for (const auto& oldObject : data) { - ASSERT(oldObject.isSet()); - } - } - { - // use ArenaReader - ArenaReader reader(Arena(), value, IncludeVersion()); - std::vector data; - reader >> data; - ASSERT(data.size() == numObjects); - for (const auto& oldObject : data) { - ASSERT(oldObject.isSet()); - } - } - return Void(); - } - - std::string description() const override { return NAME; } - - Future setup(Database const& cx) override { - return clientId ? Void() : (writeOld(cx, numObjects, oldKey) && writeNew(cx, numObjects, newKey)); - } - - Future start(Database const& cx) override { - return clientId ? Void() : (readData(cx, numObjects, oldKey) && readData(cx, numObjects, newKey)); - } - - Future check(Database const& cx) override { - // Failures are checked with assertions - return true; - } - void getMetrics(std::vector& m) override {} -}; - -WorkloadFactory DowngradeWorkloadFactory(DowngradeWorkload::NAME); From 2e3f3ea2afd5e09a43de1e29ab2b84689bfa1f7b Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 15:52:39 -0700 Subject: [PATCH 37/69] Remove unused constants from serialize.cpp --- flow/serialize.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/flow/serialize.cpp b/flow/serialize.cpp index a4570721b9d..7316b995a2d 100644 --- a/flow/serialize.cpp +++ b/flow/serialize.cpp @@ -42,9 +42,6 @@ const void* BinaryReader::readBytes(int bytes) { namespace { -auto const oldKey = "oldKey"_sr; -auto const newKey = "newKey"_sr; - struct _Struct { static constexpr FileIdentifier file_identifier = 2340487; int oldField{ 0 }; From cf3c9dd5201051a95f3a2176ece105a6f15fa8fb Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 21:00:18 -0700 Subject: [PATCH 38/69] Remove reference to deleted Downgrade.toml file --- tests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 56d12d77a57..22c77e091d9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -181,7 +181,6 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES rare/ConflictRangeRYOWCheck.toml) add_fdb_test(TEST_FILES rare/CycleRollbackClogged.toml) add_fdb_test(TEST_FILES rare/CycleWithKills.toml) - add_fdb_test(TEST_FILES rare/Downgrade.toml) add_fdb_test(TEST_FILES rare/FuzzTest.toml) add_fdb_test(TEST_FILES rare/InventoryTestHeavyWrites.toml) add_fdb_test(TEST_FILES rare/LargeApiCorrectness.toml) From 70b5ee35b941b90e4e32681dc5f652cd23a2b92e Mon Sep 17 00:00:00 2001 From: Trevor Clinkenbeard Date: Mon, 1 Nov 2021 13:48:38 -0700 Subject: [PATCH 39/69] Add comment to flow/serialize/Downgrade/WriteNew unit test Co-authored-by: Andrew Noyes --- flow/serialize.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flow/serialize.cpp b/flow/serialize.cpp index 7316b995a2d..04d8b460cd0 100644 --- a/flow/serialize.cpp +++ b/flow/serialize.cpp @@ -109,6 +109,7 @@ TEST_CASE("flow/serialize/Downgrade/WriteOld") { return Void(); } +// Verify that old code will still be able to read the values of the struct it knows about, even if we add a new field and write a message with new code. TEST_CASE("flow/serialize/Downgrade/WriteNew") { auto protocolVersion = g_network->protocolVersion(); protocolVersion.addObjectSerializerFlag(); From 841e6b211be3e914a59d4adc70b083b84531812d Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Mon, 1 Nov 2021 14:01:29 -0700 Subject: [PATCH 40/69] Run clang-format on flow/serialize.cpp --- flow/serialize.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flow/serialize.cpp b/flow/serialize.cpp index 04d8b460cd0..a39461d3ddd 100644 --- a/flow/serialize.cpp +++ b/flow/serialize.cpp @@ -109,7 +109,8 @@ TEST_CASE("flow/serialize/Downgrade/WriteOld") { return Void(); } -// Verify that old code will still be able to read the values of the struct it knows about, even if we add a new field and write a message with new code. +// Verify that old code will still be able to read the values of the struct it knows about, even if we add a new field +// and write a message with new code. TEST_CASE("flow/serialize/Downgrade/WriteNew") { auto protocolVersion = g_network->protocolVersion(); protocolVersion.addObjectSerializerFlag(); From 3a6a9bdba57a62b733bda4654f7c9d34d2a607b0 Mon Sep 17 00:00:00 2001 From: Yao Xiao <87789492+yao-xiao-github@users.noreply.github.com> Date: Mon, 1 Nov 2021 16:07:36 -0700 Subject: [PATCH 41/69] Update documentation/sphinx/source/api-common.rst.inc apply fix Co-authored-by: A.J. Beamon --- documentation/sphinx/source/api-common.rst.inc | 1 - 1 file changed, 1 deletion(-) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index 70378d524c8..b98b7c54615 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -629,7 +629,6 @@ Enables debugging feature to perform run loop profiling. Requires trace logging to be enabled. WARNING: this feature is not recommended for use in production. - .. |option-set-distributed-client-tracer| replace:: Sets a tracer to run on the client. Should be set to the same value as the tracer set on the server. \ No newline at end of file From 7ce29dd153af94f85d3d9618985b5c99f4b9876a Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Mon, 1 Nov 2021 16:26:37 -0700 Subject: [PATCH 42/69] fix CI error --- documentation/sphinx/source/api-common.rst.inc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/documentation/sphinx/source/api-common.rst.inc b/documentation/sphinx/source/api-common.rst.inc index b98b7c54615..704e2244964 100644 --- a/documentation/sphinx/source/api-common.rst.inc +++ b/documentation/sphinx/source/api-common.rst.inc @@ -609,10 +609,6 @@ Sets the passphrase for encrypted private key. Password should be set before setting the key for the password to be used. -.. |option-disable-multi-version-client-api| replace:: - - Disables the multi-version client API and instead uses the local client directly. Must be set before setting up the network. - .. |option-set-disable-local-client| replace:: Prevents connections through the local client, allowing only connections through externally loaded client libraries. From f6dc54ebbe3d8f9c494884c97083e730b6b5c8ab Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Thu, 28 Oct 2021 16:40:02 -0700 Subject: [PATCH 43/69] Add transaction state store documentation With code pointers. --- design/transaction-state-store.md | 99 +++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 design/transaction-state-store.md diff --git a/design/transaction-state-store.md b/design/transaction-state-store.md new file mode 100644 index 00000000000..7ee8d2faa77 --- /dev/null +++ b/design/transaction-state-store.md @@ -0,0 +1,99 @@ +# Transaction State Store (txnStateStore) + +This document describes the transaction state store (often is referred as `txnStateStore` in the code) in FDB. The transaction state store keeps important metadata about the database to bootstrap the database, to guide the transaction system to persist writes (i.e., help assign storage tags to mutations at commit proxies), and to manage data (i.e., shard) movement metadata. This is a critical piece of information that have to be consistent across many processes and to be persistent for recovery. + +Acknowledgment: A lot of contents are taken from [Evan's FDB brownbag talk](https://drive.google.com/file/d/15UvKiNc-jSFfDGygNmLQP_d4b14X3DAS/). + +## What information is stored in transaction state store? + +The information includes: shard mapping (key range to storage server mapping, i.e., +`keyServers`), storage server tags (`serverTags`), tagLocalityList, storage server tag +history, database locked flag, metadata version, mustContainSystemMutations, coordinators, +storage server interface (`serverList`), database configurations, TSS mappings and +quarantines, backup apply mutation ranges and log ranges, etc. + +The information of transaction state store is kept in the system key space, i.e., using the +`\xff` prefix. Note all data in the system key space are saved on storage servers. The +`txnStateStore` is only a part of the `\xff` key space, and is additionally kept in the +memory of commit proxies as well as disks of the log system (i.e., TLogs). Changes to +the `txnStateStore` are special mutations to the `\xff` key space, and are called +inconsistently in the code base as "metadata mutations" in commit proxies and +"state mutations" in Resolvers. + +## Why do we need transaction state store? + +When bootstraping an FDB cluster, the new master (i.e., the sequencer) role recruits a +new transaction system and initializes them. In particular, the transaction state store +is first read by the master from previous generation's log system, and then broadcast to +all commit proxies of the new transaction system. After initializing `txnStateStore`, these +commit proxies know how to assign mutations with storage server tags: `txnStateStore` +contains the shard map from key range to storage servers; commit proxies use the shard +map to find and attach the destination storage tags for each mutation. + +## How is transaction state store replicated? + +The `txnStateStore` is replicated in all commit proxies' memories. It is very important +that `txnStateStore` data are consistent, otherwise, a shard change issued by one commit +proxy could result in a situation where different proxies think they should send a +mutation to different storage servers, thus causing data corruptions. + +FDB solves this problem by state machine replication: all commit proxies start with the +same `txnStateStore` data (from master broadcast), and apply the same sequence of mutations. +Because commits happen at all proxies, it is difficult to maintain the same order as well +as minimize the communication among them. Fortunately, every transaction has to send a +conflict resolution request to all Resolvers and they process transactions in strict order +of commit versions. Leveraging this mechanism, each commit proxy sends all metadata +(i.e., system key) mutations to all Resolvers. Resolvers keep these mutations in memory +and forward to other commit proxies in separate resolution response. Each commit proxy +receive resolution response, along with metadata mutations happend at other proxies before +its commit version, and apply all these metadata mutations in the commit order. +Finally, this proxy only writes metadata mutations in its own transaction batch to TLogs, +i.e., do not write other proxies' metadata mutations to TLogs to avoid repeated writes. +Notably `\xff\x02` prefix is used for backup data and is *NOT* metadata mutations. + +## How is transaction state store persisted? + +When a commit proxy writes metadata mutations to the log system, the proxy assigns a +"txs" tag to the mutation. Depending on FDB versions, the "txs" tag can be one special +tag `txsTag{ tagLocalitySpecial, 1 }` for `TLogVersion::V3` (FDB 6.1) or a randomized +"txs" tag for `TLogVersion::V4` (FDB 6.2 and later) and larger. The idea of randomized +"txs" tag is to spread metadata mutations to all TLogs for faster parallel recovery of +`txnStateStore`. + +At TLogs, all mutation data are indexed by tags. "txs" tag data is special, since it is +only peeked by the master during the transaction system recovery. +See [TLog Spilling doc](tlog-spilling.md.html) for more detailed discussion on the +topic of spilling "txs" data. In short, `txsTag` is spilled by value. +"txs" tag data is indexed and stored in both primary TLogs and satellite TLogs. +Note satellite TLogs only index log router tags and "txs" tags. + +## How is transaction state store implemented? + +`txnStateStore` is kept in memory at commit proxies using `KeyValueStoreMemory`, which +uses `LogSystemDiskQueueAdapter` to be durable with the log system. As a result, reading +from `txnStateStore` never blocks, which means the futures returned by read calls should +always be ready. Writes to `txnStateStore` are first buffered by the `LogSystemDiskQueueAdapter` +in memory. After a commit proxy pushes transaction data to the log system and the data +becomes durable, the proxy clears the buffered data in `LogSystemDiskQueueAdapter`. + +* Master reads `txnStateStore` from old log system: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/masterserver.actor.cpp#L928-L931 + +* Master broadcasts `txnStateStore` to commit proxies: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/masterserver.actor.cpp#L940-L968 + +* Commit proxies receive txnStateStore broadcast and builds the `keyInfo` map: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L1886-L1927 + * Look up `keyInfo` map for `GetKeyServerLocationsRequest`: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L1464 + * Look up `keyInfo` map for assign mutations with storage tags: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L926 and https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L965-L1010 + +* Commit proxies recover database lock flag and metadata version: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L1942-L1944 + +* Commit proxies add metadata mutations to Resolver request: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L137-L140 + +* Resolvers keep these mutations in memory: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/Resolver.actor.cpp#L220-L230 + +* Resolvers copy metadata mutations to resolution reply message: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/Resolver.actor.cpp#L244-L249 + +* Commit proxies apply all metadata mutations (including those from other proxies) in the commit order: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L740-L770 + +* Commit proxies only write metadata mutations in its own transaction batch to TLogs: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L772-L774 adds mutations to `storeCommits`. Later in `postResolution()`, https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L1162-L1176, only the last one in `storeCommits` are send to TLogs. + +* Commit proxies clear the buffered data in `LogSystemDiskQueueAdapter` after TLog push: https://github.com/apple/foundationdb/blob/6281e647784e74dccb3a6cb88efb9d8b9cccd376/fdbserver/CommitProxyServer.actor.cpp#L1283-L1287 From c913f89227c81fa2fe7f558da84ad863fd12e849 Mon Sep 17 00:00:00 2001 From: Jingyu Zhou Date: Tue, 2 Nov 2021 11:54:58 -0700 Subject: [PATCH 44/69] Address Markus's comments --- design/transaction-state-store.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/design/transaction-state-store.md b/design/transaction-state-store.md index 7ee8d2faa77..0f09a38e52c 100644 --- a/design/transaction-state-store.md +++ b/design/transaction-state-store.md @@ -49,9 +49,21 @@ receive resolution response, along with metadata mutations happend at other prox its commit version, and apply all these metadata mutations in the commit order. Finally, this proxy only writes metadata mutations in its own transaction batch to TLogs, i.e., do not write other proxies' metadata mutations to TLogs to avoid repeated writes. -Notably `\xff\x02` prefix is used for backup data and is *NOT* metadata mutations. -## How is transaction state store persisted? +It's worth calling out that everything in the `txnStateStore` is stored at some storage +servers and a client (e.g., `fdbcli`) can read from these storage servers. During the +commit process, commit proxies parse all mutations in a batch of transactions, and apply +changes (i.e., metadata mutations) to its in-memory copy of `txnStateStore`. Later, the +same changes are applied at storage servers for persistence. Additionally, the process +to store `txnStateStore` at log system is described below. + +Notably `applyMetadataMutations()` is the function that commit proxies use to make changes +to `txnStateStore`. The key ranges stored in `txnStateStore` include `[\xff, \xff\x02)` and +`[\xff\x03, \xff\xff)`, but not everything in these ranges. There is no data in the range +of `[\xff\x02, \xff\x03)` belong to `txnStateStore`, e.g., `\xff\x02` prefix is used for +backup data and is *NOT* metadata mutations. + +## How is transaction state store persisted at log system? When a commit proxy writes metadata mutations to the log system, the proxy assigns a "txs" tag to the mutation. Depending on FDB versions, the "txs" tag can be one special From 84854761cb2cd8546d6a60911626853d3868ea1e Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Tue, 2 Nov 2021 21:47:31 -0700 Subject: [PATCH 45/69] Change Redwood to use xxhash for checksums. --- fdbserver/IPager.h | 6 +++--- fdbserver/VersionedBTree.actor.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fdbserver/IPager.h b/fdbserver/IPager.h index 39209d9b271..1346279da99 100644 --- a/fdbserver/IPager.h +++ b/fdbserver/IPager.h @@ -26,7 +26,7 @@ #include "flow/flow.h" #include "fdbclient/FDBTypes.h" -#include "flow/crc32c.h" +#include "flow/xxhash.h" #ifndef VALGRIND #define VALGRIND_MAKE_MEM_UNDEFINED(x, y) @@ -101,7 +101,7 @@ class ArenaPage : public ReferenceCounted, public FastAllocated, public FastAllocated Date: Tue, 2 Nov 2021 21:48:37 -0700 Subject: [PATCH 46/69] Remove unused variable and more clearly explain out of range annotation in Redwood debug output. --- fdbserver/VersionedBTree.actor.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 8b83cecdc93..7c79052dd30 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -4447,15 +4447,17 @@ struct BTreePage { c.moveFirst(); ASSERT(c.valid()); - bool anyOutOfRange = false; do { r += " "; r += c.get().toString(height == 1); + // Out of range entries are annotated but can actually be valid, as they can be the result of + // subtree deletion followed by incremental insertions of records in the deleted range being added + // to an adjacent subtree which is logically expanded encompass the deleted range but still is using + // the original subtree boundaries as DeltaTree2 boundaries. bool tooLow = c.get().withoutValue() < lowerBound.withoutValue(); bool tooHigh = c.get().withoutValue() >= upperBound.withoutValue(); if (tooLow || tooHigh) { - anyOutOfRange = true; if (tooLow) { r += " (below decode lower bound)"; } @@ -4466,12 +4468,6 @@ struct BTreePage { r += "\n"; } while (c.moveNext()); - - // Out of range entries are actually okay now and the result of subtree deletion followed by - // incremental insertions of records in the deleted range being added to an adjacent subtree - // which is logically expanded encompass the deleted range but still is using the original - // subtree boundaries as DeltaTree2 boundaries. - // ASSERT(!anyOutOfRange); } } catch (Error& e) { debug_printf("BTreePage::toString ERROR: %s\n", e.what()); From 313a3dcd630822d3da81dd1562dde1174fa5f313 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Tue, 2 Nov 2021 23:16:03 -0700 Subject: [PATCH 47/69] Change FlowTransport to use 64 bit XXHash instead of CRC32. --- fdbrpc/FlowTransport.actor.cpp | 55 ++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index d67877835bb..f6aacda5db0 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -27,7 +27,6 @@ #include #endif -#include "flow/crc32c.h" #include "fdbrpc/fdbrpc.h" #include "fdbrpc/FailureMonitor.h" #include "fdbrpc/HealthMonitor.h" @@ -41,6 +40,7 @@ #include "flow/ObjectSerializer.h" #include "flow/ProtocolVersion.h" #include "flow/UnitTest.h" +#include "flow/xxhash.h" #include "flow/actorcompiler.h" // This must be the last #include. static NetworkAddressList g_currentDeliveryPeerAddress = NetworkAddressList(); @@ -984,21 +984,22 @@ static void scanPackets(TransportData* transport, const bool checksumEnabled = !peerAddress.isTLS(); loop { - uint32_t packetLen, packetChecksum; + uint32_t packetLen; + XXH64_hash_t packetChecksum; - // Retrieve packet length and checksum + // Read packet length if size is sufficient or stop + if (e - p < PACKET_LEN_WIDTH) + break; + packetLen = *(uint32_t*)p; + p += PACKET_LEN_WIDTH; + + // Read checksum if present if (checksumEnabled) { - if (e - p < sizeof(uint32_t) * 2) - break; - packetLen = *(uint32_t*)p; - p += PACKET_LEN_WIDTH; - packetChecksum = *(uint32_t*)p; - p += sizeof(uint32_t); - } else { - if (e - p < sizeof(uint32_t)) + // Read checksum if size is sufficient or stop + if (e - p < sizeof(packetChecksum)) break; - packetLen = *(uint32_t*)p; - p += PACKET_LEN_WIDTH; + packetChecksum = *(XXH64_hash_t*)p; + p += sizeof(packetChecksum); } if (packetLen > FLOW_KNOBS->PACKET_LIMIT) { @@ -1036,23 +1037,23 @@ static void scanPackets(TransportData* transport, } } - uint32_t calculatedChecksum = crc32c_append(0, p, packetLen); + XXH64_hash_t calculatedChecksum = XXH3_64bits(p, packetLen); if (calculatedChecksum != packetChecksum) { if (isBuggifyEnabled) { TraceEvent(SevInfo, "ChecksumMismatchExp") - .detail("PacketChecksum", (int)packetChecksum) - .detail("CalculatedChecksum", (int)calculatedChecksum); + .detail("PacketChecksum", packetChecksum) + .detail("CalculatedChecksum", calculatedChecksum); } else { TraceEvent(SevWarnAlways, "ChecksumMismatchUnexp") - .detail("PacketChecksum", (int)packetChecksum) - .detail("CalculatedChecksum", (int)calculatedChecksum); + .detail("PacketChecksum", packetChecksum) + .detail("CalculatedChecksum", calculatedChecksum); } throw checksum_failed(); } else { if (isBuggifyEnabled) { TraceEvent(SevError, "ChecksumMatchUnexp") - .detail("PacketChecksum", (int)packetChecksum) - .detail("CalculatedChecksum", (int)calculatedChecksum); + .detail("PacketChecksum", packetChecksum) + .detail("CalculatedChecksum", calculatedChecksum); } } } @@ -1584,10 +1585,15 @@ static ReliablePacket* sendPacket(TransportData* self, // Reserve some space for packet length and checksum, write them after serializing data SplitBuffer packetInfoBuffer; - uint32_t len, checksum = 0; + uint32_t len; + XXH64_hash_t checksum = 0; + XXH3_state_t* checksumState = nullptr; + int packetInfoSize = PACKET_LEN_WIDTH; if (checksumEnabled) { packetInfoSize += sizeof(checksum); + checksumState = XXH3_createState(); + XXH3_64bits_reset(checksumState); } wr.writeAhead(packetInfoSize, &packetInfoBuffer); @@ -1609,11 +1615,16 @@ static ReliablePacket* sendPacket(TransportData* self, while (checksumUnprocessedLength > 0) { uint32_t processLength = std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten)); - checksum = crc32c_append(checksum, checksumPb->data() + prevBytesWritten, processLength); + // This won't fail if inputs are non null + XXH3_64bits_update(checksumState, checksumPb->data() + prevBytesWritten, processLength); checksumUnprocessedLength -= processLength; checksumPb = checksumPb->nextPacketBuffer(); prevBytesWritten = 0; } + + checksum = XXH3_64bits_digest(checksumState); + // This always returns OK + XXH3_freeState(checksumState); } // Write packet length and checksum into packet buffer From f3d62ce0029f551292f6ac43c78caeec3d4ce45d Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Tue, 2 Nov 2021 23:40:53 -0700 Subject: [PATCH 48/69] Assert success on XXHash stream functions. --- fdbrpc/FlowTransport.actor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index f6aacda5db0..501ed025e5f 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -1593,7 +1593,7 @@ static ReliablePacket* sendPacket(TransportData* self, if (checksumEnabled) { packetInfoSize += sizeof(checksum); checksumState = XXH3_createState(); - XXH3_64bits_reset(checksumState); + ASSERT(XXH3_64bits_reset(checksumState) == XXH_OK); } wr.writeAhead(packetInfoSize, &packetInfoBuffer); @@ -1616,7 +1616,7 @@ static ReliablePacket* sendPacket(TransportData* self, uint32_t processLength = std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten)); // This won't fail if inputs are non null - XXH3_64bits_update(checksumState, checksumPb->data() + prevBytesWritten, processLength); + ASSERT(XXH3_64bits_update(checksumState, checksumPb->data() + prevBytesWritten, processLength) == XXH_OK); checksumUnprocessedLength -= processLength; checksumPb = checksumPb->nextPacketBuffer(); prevBytesWritten = 0; @@ -1624,7 +1624,7 @@ static ReliablePacket* sendPacket(TransportData* self, checksum = XXH3_64bits_digest(checksumState); // This always returns OK - XXH3_freeState(checksumState); + ASSERT(XXH3_freeState(checksumState) == XXH_OK); } // Write packet length and checksum into packet buffer From dc3f46c2aeb3c98501da32cd622c35b16bfaeb68 Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Wed, 3 Nov 2021 11:00:12 -0700 Subject: [PATCH 49/69] Change asserts to throw internal_error as these function calls should never be compiled out. --- fdbrpc/FlowTransport.actor.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index 501ed025e5f..4f5d5e2d1b8 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -1593,7 +1593,9 @@ static ReliablePacket* sendPacket(TransportData* self, if (checksumEnabled) { packetInfoSize += sizeof(checksum); checksumState = XXH3_createState(); - ASSERT(XXH3_64bits_reset(checksumState) == XXH_OK); + if (XXH3_64bits_reset(checksumState) != XXH_OK) { + throw internal_error(); + } } wr.writeAhead(packetInfoSize, &packetInfoBuffer); @@ -1616,7 +1618,9 @@ static ReliablePacket* sendPacket(TransportData* self, uint32_t processLength = std::min(checksumUnprocessedLength, (uint32_t)(checksumPb->bytes_written - prevBytesWritten)); // This won't fail if inputs are non null - ASSERT(XXH3_64bits_update(checksumState, checksumPb->data() + prevBytesWritten, processLength) == XXH_OK); + if (XXH3_64bits_update(checksumState, checksumPb->data() + prevBytesWritten, processLength) != XXH_OK) { + throw internal_error(); + } checksumUnprocessedLength -= processLength; checksumPb = checksumPb->nextPacketBuffer(); prevBytesWritten = 0; @@ -1624,7 +1628,9 @@ static ReliablePacket* sendPacket(TransportData* self, checksum = XXH3_64bits_digest(checksumState); // This always returns OK - ASSERT(XXH3_freeState(checksumState) == XXH_OK); + if (XXH3_freeState(checksumState) != XXH_OK) { + throw internal_error(); + } } // Write packet length and checksum into packet buffer From 0853661d13f45aa54a26e85cff63808de58a3213 Mon Sep 17 00:00:00 2001 From: Tao Lin Date: Wed, 3 Nov 2021 12:44:30 -0700 Subject: [PATCH 50/69] Introduce getRangeAndHop to push computations down to FDB --- bindings/c/fdb_c.cpp | 93 +++- bindings/c/foundationdb/fdb_c.h | 18 + bindings/c/test/unit/fdb_api.cpp | 35 ++ bindings/c/test/unit/fdb_api.hpp | 18 + bindings/c/test/unit/unit_tests.cpp | 162 +++++- bindings/java/fdbJNI.cpp | 69 +++ .../RangeAndHopQueryIntegrationTest.java | 252 +++++++++ .../foundationdb/FakeFDBTransaction.java | 7 +- .../apple/foundationdb/FDBTransaction.java | 43 +- .../com/apple/foundationdb/RangeQuery.java | 26 +- .../apple/foundationdb/ReadTransaction.java | 36 ++ bindings/java/src/tests.cmake | 1 + .../release-notes/release-notes-700.rst | 3 + fdbclient/DatabaseContext.h | 1 + fdbclient/IClientApi.h | 6 + fdbclient/ISingleThreadTransaction.h | 6 + fdbclient/MultiVersionTransaction.actor.cpp | 48 ++ fdbclient/MultiVersionTransaction.h | 29 + fdbclient/NativeAPI.actor.cpp | 216 +++++--- fdbclient/NativeAPI.actor.h | 17 + fdbclient/PaxosConfigTransaction.h | 8 + fdbclient/ReadYourWrites.actor.cpp | 122 ++++ fdbclient/ReadYourWrites.h | 6 + fdbclient/ServerKnobs.cpp | 2 + fdbclient/ServerKnobs.h | 2 + fdbclient/SimpleConfigTransaction.h | 8 + fdbclient/StorageServerInterface.cpp | 45 ++ fdbclient/StorageServerInterface.h | 46 ++ fdbclient/ThreadSafeTransaction.cpp | 17 + fdbclient/ThreadSafeTransaction.h | 6 + fdbrpc/TSSComparison.h | 5 +- fdbserver/CMakeLists.txt | 1 + fdbserver/storageserver.actor.cpp | 523 +++++++++++++++++- fdbserver/worker.actor.cpp | 5 + .../workloads/IndexPrefetchDemo.actor.cpp | 144 +++++ flow/Platform.h | 1 + flow/error_definitions.h | 6 + tests/CMakeLists.txt | 1 + tests/fast/IndexPrefetchDemo.toml | 6 + 39 files changed, 1909 insertions(+), 131 deletions(-) create mode 100644 bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java create mode 100644 fdbserver/workloads/IndexPrefetchDemo.actor.cpp create mode 100644 tests/fast/IndexPrefetchDemo.toml diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index ecb78e4df7a..f7a00ad4906 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -436,21 +436,12 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_addresses_for_key(FDBTransac return (FDBFuture*)(TXN(tr)->getAddressesForKey(KeyRef(key_name, key_name_length)).extractPtr()); } -FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - int limit, - int target_bytes, +// Set to the actual limit, target_bytes, and reverse. +FDBFuture* validate_and_update_parameters(int& limit, + int& target_bytes, FDBStreamingMode mode, int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { + fdb_bool_t& reverse) { /* This method may be called with a runtime API version of 13, in which negative row limits are a reverse range read */ if (g_api_version <= 13 && limit < 0) { @@ -500,6 +491,27 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, else if (mode_bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED) target_bytes = std::min(target_bytes, mode_bytes); + return nullptr; +} + +FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse); + if (r != nullptr) + return r; return ( FDBFuture*)(TXN(tr) ->getRange( @@ -511,6 +523,60 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, .extractPtr()); } +FDBFuture* fdb_transaction_get_range_and_hop_impl(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse); + if (r != nullptr) + return r; + return ( + FDBFuture*)(TXN(tr) + ->getRangeAndHop( + KeySelectorRef(KeyRef(begin_key_name, begin_key_name_length), begin_or_equal, begin_offset), + KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset), + StringRef(hop_info_name, hop_info_name_length), + GetRangeLimits(limit, target_bytes), + snapshot, + reverse) + .extractPtr()); +} + +// TODO: Support FDB_API_ADDED in generate_asm.py and then this can be replaced with fdb_api_ptr_unimpl. +FDBFuture* fdb_transaction_get_range_and_hop_v699(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + fprintf(stderr, "UNIMPLEMENTED FDB API FUNCTION\n"); + abort(); +} + FDBFuture* fdb_transaction_get_range_selector_v13(FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length, @@ -702,6 +768,7 @@ extern "C" DLLEXPORT fdb_error_t fdb_select_api_version_impl(int runtime_version // WARNING: use caution when implementing removed functions by calling public API functions. This can lead to // undesired behavior when using the multi-version API. Instead, it is better to have both the removed and public // functions call an internal implementation function. See fdb_create_database_impl for an example. + FDB_API_CHANGED(fdb_transaction_get_range_and_hop, 700); FDB_API_REMOVED(fdb_future_get_version, 620); FDB_API_REMOVED(fdb_create_cluster, 610); FDB_API_REMOVED(fdb_cluster_create_database, 610); diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 81bf10d8a8e..5329fbd7d04 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -244,6 +244,24 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range(FDBTransaction fdb_bool_t reverse); #endif +DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_hop(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); + DLLEXPORT void fdb_transaction_set(FDBTransaction* tr, uint8_t const* key_name, int key_name_length, diff --git a/bindings/c/test/unit/fdb_api.cpp b/bindings/c/test/unit/fdb_api.cpp index e59085eeb97..c628d618b14 100644 --- a/bindings/c/test/unit/fdb_api.cpp +++ b/bindings/c/test/unit/fdb_api.cpp @@ -193,6 +193,41 @@ KeyValueArrayFuture Transaction::get_range(const uint8_t* begin_key_name, reverse)); } +KeyValueArrayFuture Transaction::get_range_and_hop(const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + return KeyValueArrayFuture(fdb_transaction_get_range_and_hop(tr_, + begin_key_name, + begin_key_name_length, + begin_or_equal, + begin_offset, + end_key_name, + end_key_name_length, + end_or_equal, + end_offset, + hop_info_name, + hop_info_name_length, + limit, + target_bytes, + mode, + iteration, + snapshot, + reverse)); +} + EmptyFuture Transaction::watch(std::string_view key) { return EmptyFuture(fdb_transaction_watch(tr_, (const uint8_t*)key.data(), key.size())); } diff --git a/bindings/c/test/unit/fdb_api.hpp b/bindings/c/test/unit/fdb_api.hpp index 17f25d55ee4..c03b720b8dd 100644 --- a/bindings/c/test/unit/fdb_api.hpp +++ b/bindings/c/test/unit/fdb_api.hpp @@ -219,6 +219,24 @@ class Transaction final { fdb_bool_t snapshot, fdb_bool_t reverse); + // Returns a future which will be set to an FDBKeyValue array. + KeyValueArrayFuture get_range_and_hop(const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); + // Wrapper around fdb_transaction_watch. Returns a future representing an // empty value. EmptyFuture watch(std::string_view key); diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index fe88e6b96f2..5dc477a0ed3 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -40,6 +40,7 @@ #define DOCTEST_CONFIG_IMPLEMENT #include "doctest.h" #include "fdbclient/rapidjson/document.h" +#include "fdbclient/Tuple.h" #include "flow/config.h" @@ -76,7 +77,7 @@ fdb_error_t wait_future(fdb::Future& f) { // Given a string s, returns the "lowest" string greater than any string that // starts with s. Taken from // https://github.com/apple/foundationdb/blob/e7d72f458c6a985fdfa677ae021f357d6f49945b/flow/flow.cpp#L223. -std::string strinc(const std::string& s) { +std::string strinc_str(const std::string& s) { int index = -1; for (index = s.size() - 1; index >= 0; --index) { if ((uint8_t)s[index] != 255) { @@ -92,16 +93,16 @@ std::string strinc(const std::string& s) { return r; } -TEST_CASE("strinc") { - CHECK(strinc("a").compare("b") == 0); - CHECK(strinc("y").compare("z") == 0); - CHECK(strinc("!").compare("\"") == 0); - CHECK(strinc("*").compare("+") == 0); - CHECK(strinc("fdb").compare("fdc") == 0); - CHECK(strinc("foundation database 6").compare("foundation database 7") == 0); +TEST_CASE("strinc_str") { + CHECK(strinc_str("a").compare("b") == 0); + CHECK(strinc_str("y").compare("z") == 0); + CHECK(strinc_str("!").compare("\"") == 0); + CHECK(strinc_str("*").compare("+") == 0); + CHECK(strinc_str("fdb").compare("fdc") == 0); + CHECK(strinc_str("foundation database 6").compare("foundation database 7") == 0); char terminated[] = { 'a', 'b', '\xff' }; - CHECK(strinc(std::string(terminated, 3)).compare("ac") == 0); + CHECK(strinc_str(std::string(terminated, 3)).compare("ac") == 0); } // Helper function to add `prefix` to all keys in the given map. Returns a new @@ -117,7 +118,7 @@ std::map create_data(std::map& data) { fdb::Transaction tr(db); - auto end_key = strinc(prefix); + auto end_key = strinc_str(prefix); while (1) { tr.clear_range(prefix, end_key); for (const auto& [key, val] : data) { @@ -224,6 +225,59 @@ GetRangeResult get_range(fdb::Transaction& tr, return GetRangeResult{ results, out_more != 0, 0 }; } +GetRangeResult get_range_and_hop(fdb::Transaction& tr, + const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* hop_info_name, + int hop_info_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + fdb::KeyValueArrayFuture f1 = tr.get_range_and_hop(begin_key_name, + begin_key_name_length, + begin_or_equal, + begin_offset, + end_key_name, + end_key_name_length, + end_or_equal, + end_offset, + hop_info_name, + hop_info_name_length, + limit, + target_bytes, + mode, + iteration, + snapshot, + reverse); + + fdb_error_t err = wait_future(f1); + if (err) { + return GetRangeResult{ {}, false, err }; + } + + const FDBKeyValue* out_kv; + int out_count; + fdb_bool_t out_more; + fdb_check(f1.get(&out_kv, &out_count, &out_more)); + + std::vector> results; + for (int i = 0; i < out_count; ++i) { + std::string key((const char*)out_kv[i].key, out_kv[i].key_length); + std::string value((const char*)out_kv[i].value, out_kv[i].value_length); + results.emplace_back(key, value); + } + return GetRangeResult{ results, out_more != 0, 0 }; +} + // Clears all data in the database. void clear_data(FDBDatabase* db) { insert_data(db, {}); @@ -819,6 +873,86 @@ TEST_CASE("fdb_transaction_set_read_version future_version") { CHECK(err == 1009); // future_version } +const std::string EMPTY = Tuple().pack().toString(); +const KeyRef RECORD = "RECORD"_sr; +const KeyRef INDEX = "INDEX"_sr; +static KeyRef primaryKey(const int i) { + return KeyRef(format("primary-key-of-record-%08d", i)); +} +static KeyRef indexKey(const int i) { + return KeyRef(format("index-key-of-record-%08d", i)); +} +static ValueRef dataOfRecord(const int i) { + return KeyRef(format("data-of-record-%08d", i)); +} +static std::string indexEntryKey(const int i) { + return Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack().toString(); +} +static std::string recordKey(const int i) { + return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack().toString(); +} +static std::string recordValue(const int i) { + return Tuple().append(dataOfRecord(i)).pack().toString(); +} + +TEST_CASE("fdb_transaction_get_range_and_hop") { + // Note: The user requested `prefix` should be added as the first element of the tuple that forms the key, rather + // than the prefix of the key. So we don't use key() or create_data() in this test. + std::map data; + for (int i = 0; i < 3; i++) { + data[indexEntryKey(i)] = EMPTY; + data[recordKey(i)] = recordValue(i); + } + insert_data(db, data); + + std::string hop_info = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString(); + + fdb::Transaction tr(db); + // get_range_and_hop is only support without RYW. This is a must!!! + fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0)); + while (1) { + auto result = get_range_and_hop( + tr, + // [0, 1] + FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()), + FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()), + (const uint8_t*)hop_info.c_str(), + hop_info.size(), + /* limit */ 0, + /* target_bytes */ 0, + /* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL, + /* iteration */ 0, + /* snapshot */ false, + /* reverse */ 0); + + if (result.err) { + fdb::EmptyFuture f1 = tr.on_error(result.err); + fdb_check(wait_future(f1)); + continue; + } + + // Only the first 2 records are supposed to be returned. + if (result.kvs.size() < 2) { + CHECK(result.more); + // Retry. + continue; + } + + CHECK(result.kvs.size() == 2); + CHECK(!result.more); + for (int i = 0; i < 2; i++) { + const auto& [key, value] = result.kvs[i]; + std::cout << "result[" << i << "]: key=" << key << ", value=" << value << std::endl; + // OUTPUT: + // result[0]: key=fdbRECORDprimary-key-of-record-00000000, value=data-of-record-00000000 + // result[1]: key=fdbRECORDprimary-key-of-record-00000001, value=data-of-record-00000001 + CHECK(recordKey(i).compare(key) == 0); + CHECK(recordValue(i).compare(value) == 0); + } + break; + } +} + TEST_CASE("fdb_transaction_get_range reverse") { std::map data = create_data({ { "a", "1" }, { "b", "2" }, { "c", "3" }, { "d", "4" } }); insert_data(db, data); @@ -1726,7 +1860,7 @@ TEST_CASE("fdb_transaction_add_conflict_range") { fdb::Transaction tr2(db); while (1) { - fdb_check(tr2.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); + fdb_check(tr2.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); fdb::EmptyFuture f1 = tr2.commit(); fdb_error_t err = wait_future(f1); @@ -1739,8 +1873,8 @@ TEST_CASE("fdb_transaction_add_conflict_range") { } while (1) { - fdb_check(tr.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_READ)); - fdb_check(tr.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); + fdb_check(tr.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_READ)); + fdb_check(tr.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); @@ -2217,7 +2351,7 @@ TEST_CASE("commit_does_not_reset") { continue; } - fdb_check(tr2.add_conflict_range(key("foo"), strinc(key("foo")), FDB_CONFLICT_RANGE_TYPE_READ)); + fdb_check(tr2.add_conflict_range(key("foo"), strinc_str(key("foo")), FDB_CONFLICT_RANGE_TYPE_READ)); tr2.set(key("foo"), "bar"); fdb::EmptyFuture tr2CommitFuture = tr2.commit(); err = wait_future(tr2CommitFuture); diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 587190d3a5e..4caea9d89a7 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -756,6 +756,75 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return (jlong)f; } +JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndHop(JNIEnv* jenv, + jobject, + jlong tPtr, + jbyteArray keyBeginBytes, + jboolean orEqualBegin, + jint offsetBegin, + jbyteArray keyEndBytes, + jboolean orEqualEnd, + jint offsetEnd, + jbyteArray hopInfoBytes, + jint rowLimit, + jint targetBytes, + jint streamingMode, + jint iteration, + jboolean snapshot, + jboolean reverse) { + if (!tPtr || !keyBeginBytes || !keyEndBytes || !hopInfoBytes) { + throwParamNotNull(jenv); + return 0; + } + FDBTransaction* tr = (FDBTransaction*)tPtr; + + uint8_t* barrBegin = (uint8_t*)jenv->GetByteArrayElements(keyBeginBytes, JNI_NULL); + if (!barrBegin) { + if (!jenv->ExceptionOccurred()) + throwRuntimeEx(jenv, "Error getting handle to native resources"); + return 0; + } + + uint8_t* barrEnd = (uint8_t*)jenv->GetByteArrayElements(keyEndBytes, JNI_NULL); + if (!barrEnd) { + jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); + if (!jenv->ExceptionOccurred()) + throwRuntimeEx(jenv, "Error getting handle to native resources"); + return 0; + } + + uint8_t* barrHopInfo = (uint8_t*)jenv->GetByteArrayElements(hopInfoBytes, JNI_NULL); + if (!barrHopInfo) { + jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); + jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); + if (!jenv->ExceptionOccurred()) + throwRuntimeEx(jenv, "Error getting handle to native resources"); + return 0; + } + + FDBFuture* f = fdb_transaction_get_range_and_hop(tr, + barrBegin, + jenv->GetArrayLength(keyBeginBytes), + orEqualBegin, + offsetBegin, + barrEnd, + jenv->GetArrayLength(keyEndBytes), + orEqualEnd, + offsetEnd, + barrHopInfo, + jenv->GetArrayLength(hopInfoBytes), + rowLimit, + targetBytes, + (FDBStreamingMode)streamingMode, + iteration, + snapshot, + reverse); + jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); + jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); + jenv->ReleaseByteArrayElements(hopInfoBytes, (jbyte*)barrHopInfo, JNI_ABORT); + return (jlong)f; +} + JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1getDirect(JNIEnv* jenv, jobject, jlong future, diff --git a/bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java b/bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java new file mode 100644 index 00000000000..2741bcb368d --- /dev/null +++ b/bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java @@ -0,0 +1,252 @@ +/* + * RangeAndHopQueryIntegrationTest.java + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.apple.foundationdb; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; + +import com.apple.foundationdb.async.AsyncIterable; +import com.apple.foundationdb.async.AsyncUtil; +import com.apple.foundationdb.tuple.ByteArrayUtil; +import com.apple.foundationdb.tuple.Tuple; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; + +@ExtendWith(RequiresDatabase.class) +class RangeAndHopQueryIntegrationTest { + private static final FDB fdb = FDB.selectAPIVersion(710); + public String databaseArg = null; + private Database openFDB() { return fdb.open(databaseArg); } + + @BeforeEach + @AfterEach + void clearDatabase() throws Exception { + /* + * Empty the database before and after each run, just in case + */ + try (Database db = openFDB()) { + db.run(tr -> { + tr.clear(Range.startsWith(new byte[] { (byte)0x00 })); + return null; + }); + } + } + + static private final byte[] EMPTY = Tuple.from().pack(); + static private final String PREFIX = "prefix"; + static private final String RECORD = "RECORD"; + static private final String INDEX = "INDEX"; + static private String primaryKey(int i) { return String.format("primary-key-of-record-%08d", i); } + static private String indexKey(int i) { return String.format("index-key-of-record-%08d", i); } + static private String dataOfRecord(int i) { return String.format("data-of-record-%08d", i); } + + static byte[] HOP_INFO = Tuple.from(PREFIX, RECORD, "{K[3]}").pack(); + static private byte[] indexEntryKey(final int i) { + return Tuple.from(PREFIX, INDEX, indexKey(i), primaryKey(i)).pack(); + } + static private byte[] recordKey(final int i) { return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack(); } + static private byte[] recordValue(final int i) { return Tuple.from(dataOfRecord(i)).pack(); } + + static private void insertRecordWithIndex(final Transaction tr, final int i) { + tr.set(indexEntryKey(i), EMPTY); + tr.set(recordKey(i), recordValue(i)); + } + + private static String getArgFromEnv() { + String[] clusterFiles = MultiClientHelper.readClusterFromEnv(); + String cluster = clusterFiles[0]; + System.out.printf("Using Cluster: %s\n", cluster); + return cluster; + } + public static void main(String[] args) throws Exception { + final RangeAndHopQueryIntegrationTest test = new RangeAndHopQueryIntegrationTest(); + test.databaseArg = getArgFromEnv(); + test.clearDatabase(); + test.comparePerformance(); + test.clearDatabase(); + } + + int numRecords = 10000; + int numQueries = 10000; + int numRecordsPerQuery = 100; + boolean validate = false; + @Test + void comparePerformance() { + FDB fdb = FDB.selectAPIVersion(710); + try (Database db = openFDB()) { + insertRecordsWithIndexes(numRecords, db); + instrument(rangeQueryAndGet, "rangeQueryAndGet", db); + instrument(rangeQueryAndHop, "rangeQueryAndHop", db); + } + } + + private void instrument(final RangeQueryWithIndex query, final String name, final Database db) { + System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d)\n", name, numQueries, numRecordsPerQuery); + long startTime = System.currentTimeMillis(); + for (int queryId = 0; queryId < numQueries; queryId++) { + int begin = ThreadLocalRandom.current().nextInt(numRecords - numRecordsPerQuery); + query.run(begin, begin + numRecordsPerQuery, db); + } + long time = System.currentTimeMillis() - startTime; + System.out.printf("Finished %s, it takes %d ms for %d queries (%d qps)\n", name, time, numQueries, + numQueries * 1000L / time); + } + + static private final int RECORDS_PER_TXN = 100; + static private void insertRecordsWithIndexes(int n, Database db) { + int i = 0; + while (i < n) { + int begin = i; + int end = Math.min(n, i + RECORDS_PER_TXN); + // insert [begin, end) in one transaction + db.run(tr -> { + for (int t = begin; t < end; t++) { + insertRecordWithIndex(tr, t); + } + return null; + }); + i = end; + } + } + + public interface RangeQueryWithIndex { + void run(int begin, int end, Database db); + } + + RangeQueryWithIndex rangeQueryAndGet = (int begin, int end, Database db) -> db.run(tr -> { + try { + List kvs = tr.getRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), + KeySelector.firstGreaterOrEqual(indexEntryKey(end)), + ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) + .asList() + .get(); + Assertions.assertEquals(end - begin, kvs.size()); + + // Get the records of each index entry IN PARALLEL. + List> resultFutures = new ArrayList<>(); + // In reality, we need to get the record key by parsing the index entry key. But considering this is a + // performance test, we just ignore the returned key and simply generate it from recordKey. + for (int id = begin; id < end; id++) { + resultFutures.add(tr.get(recordKey(id))); + } + AsyncUtil.whenAll(resultFutures).get(); + + if (validate) { + final Iterator indexes = kvs.iterator(); + final Iterator> records = resultFutures.iterator(); + for (int id = begin; id < end; id++) { + Assertions.assertTrue(indexes.hasNext()); + assertByteArrayEquals(indexEntryKey(id), indexes.next().getKey()); + Assertions.assertTrue(records.hasNext()); + assertByteArrayEquals(recordValue(id), records.next().get()); + } + Assertions.assertFalse(indexes.hasNext()); + Assertions.assertFalse(records.hasNext()); + } + } catch (Exception e) { + Assertions.fail("Unexpected exception", e); + } + return null; + }); + + RangeQueryWithIndex rangeQueryAndHop = (int begin, int end, Database db) -> db.run(tr -> { + try { + tr.options().setReadYourWritesDisable(); + List kvs = tr.getRangeAndHop(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), + KeySelector.firstGreaterOrEqual(indexEntryKey(end)), HOP_INFO, + ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) + .asList() + .get(); + Assertions.assertEquals(end - begin, kvs.size()); + + if (validate) { + final Iterator results = kvs.iterator(); + for (int id = begin; id < end; id++) { + Assertions.assertTrue(results.hasNext()); + assertByteArrayEquals(recordValue(id), results.next().getValue()); + } + Assertions.assertFalse(results.hasNext()); + } + } catch (Exception e) { + Assertions.fail("Unexpected exception", e); + } + return null; + }); + + void assertByteArrayEquals(byte[] expected, byte[] actual) { + Assertions.assertEquals(ByteArrayUtil.printable(expected), ByteArrayUtil.printable(actual)); + } + + @Test + void rangeAndHopQueryOverMultipleRows() throws Exception { + try (Database db = openFDB()) { + insertRecordsWithIndexes(3, db); + + List expected_data_of_records = new ArrayList<>(); + for (int i = 0; i <= 1; i++) { + expected_data_of_records.add(recordValue(i)); + } + + db.run(tr -> { + // getRangeAndHop is only support without RYW. This is a must!!! + tr.options().setReadYourWritesDisable(); + + Iterator kvs = + tr.getRangeAndHop(KeySelector.firstGreaterOrEqual(indexEntryKey(0)), + KeySelector.firstGreaterThan(indexEntryKey(1)), HOP_INFO, + ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) + .iterator(); + Iterator expected_data_of_records_iter = expected_data_of_records.iterator(); + while (expected_data_of_records_iter.hasNext()) { + Assertions.assertTrue(kvs.hasNext(), "iterator ended too early"); + KeyValue kv = kvs.next(); + byte[] actual_data_of_record = kv.getValue(); + byte[] expected_data_of_record = expected_data_of_records_iter.next(); + + // System.out.println("result key:" + ByteArrayUtil.printable(kv.getKey()) + " value:" + + // ByteArrayUtil.printable(kv.getValue())); Output: + // result + // key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-0\x00\x02primary-key-of-record-0\x00 + // value:\x02data-of-record-0\x00 + // result + // key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-1\x00\x02primary-key-of-record-1\x00 + // value:\x02data-of-record-1\x00 + + // For now, we don't guarantee what that the returned keys mean. + Assertions.assertArrayEquals(expected_data_of_record, actual_data_of_record, + "Incorrect data of record!"); + } + Assertions.assertFalse(kvs.hasNext(), "Iterator returned too much data"); + + return null; + }); + } + } +} diff --git a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java index f154790b2b5..1f5cbce2615 100644 --- a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java +++ b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java @@ -88,8 +88,11 @@ public CompletableFuture get(byte[] key) { public int getNumRangeCalls() { return numRangeCalls; } @Override - protected FutureResults getRange_internal(KeySelector begin, KeySelector end, int rowLimit, int targetBytes, - int streamingMode, int iteration, boolean isSnapshot, boolean reverse) { + protected FutureResults getRange_internal(KeySelector begin, KeySelector end, + // TODO: hop is not supported in FakeFDBTransaction yet. + byte[] hopInfo, // Nullable + int rowLimit, int targetBytes, int streamingMode, int iteration, + boolean isSnapshot, boolean reverse) { numRangeCalls++; // TODO this is probably not correct for all KeySelector instances--we'll want to match with real behavior NavigableMap range = diff --git a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java index 05431a0fba1..0503dbc5022 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java @@ -91,6 +91,15 @@ public CompletableFuture getRangeSplitPoints(Range range, long c return FDBTransaction.this.getRangeSplitPoints(range, chunkSize); } + @Override + public AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, + boolean reverse, StreamingMode mode) { + if (hopInfo == null) { + throw new IllegalArgumentException("HopInfo must be non-null"); + } + return new RangeQuery(FDBTransaction.this, true, begin, end, hopInfo, limit, reverse, mode, eventKeeper); + } + /////////////////// // getRange -> KeySelectors /////////////////// @@ -338,6 +347,15 @@ public CompletableFuture getRangeSplitPoints(Range range, long c return this.getRangeSplitPoints(range.begin, range.end, chunkSize); } + @Override + public AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, + boolean reverse, StreamingMode mode) { + if (hopInfo == null) { + throw new IllegalArgumentException("HopInfo must be non-null"); + } + return new RangeQuery(this, false, begin, end, hopInfo, limit, reverse, mode, eventKeeper); + } + /////////////////// // getRange -> KeySelectors /////////////////// @@ -415,10 +433,10 @@ public Database getDatabase() { } // Users of this function must close the returned FutureResults when finished - protected FutureResults getRange_internal( - KeySelector begin, KeySelector end, - int rowLimit, int targetBytes, int streamingMode, - int iteration, boolean isSnapshot, boolean reverse) { + protected FutureResults getRange_internal(KeySelector begin, KeySelector end, + byte[] hopInfo, // Nullable + int rowLimit, int targetBytes, int streamingMode, int iteration, + boolean isSnapshot, boolean reverse) { if (eventKeeper != null) { eventKeeper.increment(Events.JNI_CALL); } @@ -429,10 +447,14 @@ protected FutureResults getRange_internal( begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode, iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/ return new FutureResults( - Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), - end.getKey(), end.orEqual(), end.getOffset(), rowLimit, targetBytes, - streamingMode, iteration, isSnapshot, reverse), - FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper); + hopInfo == null + ? Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(), + end.orEqual(), end.getOffset(), rowLimit, targetBytes, streamingMode, + iteration, isSnapshot, reverse) + : Transaction_getRangeAndHop(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), + end.getKey(), end.orEqual(), end.getOffset(), hopInfo, rowLimit, + targetBytes, streamingMode, iteration, isSnapshot, reverse), + FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper); } finally { pointerReadLock.unlock(); } @@ -771,6 +793,11 @@ private native long Transaction_getRange(long cPtr, byte[] keyEnd, boolean orEqualEnd, int offsetEnd, int rowLimit, int targetBytes, int streamingMode, int iteration, boolean isSnapshot, boolean reverse); + private native long Transaction_getRangeAndHop(long cPtr, byte[] keyBegin, boolean orEqualBegin, int offsetBegin, + byte[] keyEnd, boolean orEqualEnd, int offsetEnd, + byte[] hopInfo, // Nonnull + int rowLimit, int targetBytes, int streamingMode, int iteration, + boolean isSnapshot, boolean reverse); private native void Transaction_addConflictRange(long cPtr, byte[] keyBegin, byte[] keyEnd, int conflictRangeType); private native void Transaction_set(long cPtr, byte[] key, byte[] value); diff --git a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java index d518a0b9db5..469f015d1c6 100644 --- a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java +++ b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java @@ -49,17 +49,19 @@ class RangeQuery implements AsyncIterable { private final FDBTransaction tr; private final KeySelector begin; private final KeySelector end; + private final byte[] hopInfo; // Nullable private final boolean snapshot; private final int rowLimit; private final boolean reverse; private final StreamingMode streamingMode; private final EventKeeper eventKeeper; - RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit, - boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { + RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] hopInfo, + int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { this.tr = transaction; this.begin = begin; this.end = end; + this.hopInfo = hopInfo; this.snapshot = isSnapshot; this.rowLimit = rowLimit; this.reverse = reverse; @@ -67,6 +69,12 @@ class RangeQuery implements AsyncIterable { this.eventKeeper = eventKeeper; } + // RangeQueryAndHop + RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit, + boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { + this(transaction, isSnapshot, begin, end, null, rowLimit, reverse, streamingMode, eventKeeper); + } + /** * Returns all the results from the range requested as a {@code List}. If there were no * limits on the original query and there is a large amount of data in the database @@ -83,17 +91,17 @@ public CompletableFuture> asList() { // if the streaming mode is EXACT, try and grab things as one chunk if(mode == StreamingMode.EXACT) { - FutureResults range = tr.getRange_internal( - this.begin, this.end, this.rowLimit, 0, StreamingMode.EXACT.code(), - 1, this.snapshot, this.reverse); + + FutureResults range = tr.getRange_internal(this.begin, this.end, this.hopInfo, this.rowLimit, 0, + StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse); return range.thenApply(result -> result.get().values) .whenComplete((result, e) -> range.close()); } // If the streaming mode is not EXACT, simply collect the results of an // iteration into a list - return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, rowLimit, reverse, mode, eventKeeper), - tr.getExecutor()); + return AsyncUtil.collect( + new RangeQuery(tr, snapshot, begin, end, hopInfo, rowLimit, reverse, mode, eventKeeper), tr.getExecutor()); } /** @@ -221,8 +229,8 @@ private synchronized void startNextFetch() { nextFuture = new CompletableFuture<>(); final long sTime = System.nanoTime(); - fetchingChunk = tr.getRange_internal(begin, end, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(), - ++iteration, snapshot, reverse); + fetchingChunk = tr.getRange_internal(begin, end, hopInfo, rowsLimited ? rowsRemaining : 0, 0, + streamingMode.code(), ++iteration, snapshot, reverse); BiConsumer cons = new FetchComplete(fetchingChunk,nextFuture); if(eventKeeper!=null){ diff --git a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java index 1dabc08c930..ba4b674d5a5 100644 --- a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java @@ -424,6 +424,42 @@ AsyncIterable getRange(Range range, AsyncIterable getRange(Range range, int limit, boolean reverse, StreamingMode mode); + /** + * Gets an ordered range of keys and values from the database. The begin + * and end keys are specified by {@code KeySelector}s, with the begin + * {@code KeySelector} inclusive and the end {@code KeySelector} exclusive. + * + * @see KeySelector + * @see AsyncIterator + * + * @param begin the beginning of the range (inclusive) + * @param end the end of the range (exclusive) + * @param hopInfo TODO + * @param limit the maximum number of results to return. Limits results to the + * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query + * should not limit the number of results. If {@code reverse} is {@code true} rows + * will be limited starting at the end of the range. + * @param reverse return results starting at the end of the range in reverse order. + * Reading ranges in reverse is supported natively by the database and should + * have minimal extra cost. + * @param mode provide a hint about how the results are to be used. This + * can provide speed improvements or efficiency gains based on the caller's + * knowledge of the upcoming access pattern. + * + *

+ * When converting the result of this query to a list using {@link AsyncIterable#asList()} with the {@code + * ITERATOR} streaming mode, the query is automatically modified to fetch results in larger batches. This is done + * because it is known in advance that the {@link AsyncIterable#asList()} function will fetch all results in the + * range. If a limit is specified, the {@code EXACT} streaming mode will be used, and otherwise it will use {@code + * WANT_ALL}. + * + * To achieve comparable performance when iterating over an entire range without using {@link + * AsyncIterable#asList()}, the same streaming mode would need to be used. + *

+ * @return a handle to access the results of the asynchronous call + */ + AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, + boolean reverse, StreamingMode mode); /** * Gets an estimate for the number of bytes stored in the given range. diff --git a/bindings/java/src/tests.cmake b/bindings/java/src/tests.cmake index 3e9dce6657f..ae00c389943 100644 --- a/bindings/java/src/tests.cmake +++ b/bindings/java/src/tests.cmake @@ -52,6 +52,7 @@ set(JAVA_INTEGRATION_TESTS src/integration/com/apple/foundationdb/CycleMultiClientIntegrationTest.java src/integration/com/apple/foundationdb/SidebandMultiThreadClientTest.java src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java + src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java ) # Resources that are used in integration testing, but are not explicitly test files (JUnit rules, diff --git a/documentation/sphinx/source/release-notes/release-notes-700.rst b/documentation/sphinx/source/release-notes/release-notes-700.rst index 770c4c9af5e..b7a569de3f8 100644 --- a/documentation/sphinx/source/release-notes/release-notes-700.rst +++ b/documentation/sphinx/source/release-notes/release-notes-700.rst @@ -30,6 +30,7 @@ Features * Improved the efficiency with which storage servers replicate data between themselves. `(PR #5017) `_ * Added support to ``exclude command`` to exclude based on locality match. `(PR #5113) `_ * Add the ``trace_partial_file_suffix`` network option. This option will give unfinished trace files a special suffix to indicate they're not complete yet. When the trace file is complete, it is renamed to remove the suffix. `(PR #5328) `_ +* Added "get range and hop" feature with new APIs (see Bindings section). Storage servers are able to generate the keys in the queries based on another query. With this, upper layer can push some computations down to FDB, to improve latency and bandwidth when read. `(PR #5609) `_ Performance ----------- @@ -86,6 +87,8 @@ Bindings * C: Added a function, ``fdb_database_create_snapshot``, to create a snapshot of the database. `(PR #4241) `_ * C: Added ``fdb_database_get_main_thread_busyness`` function to report how busy a client's main thread is. `(PR #4504) `_ * Java: Added ``Database.getMainThreadBusyness`` function to report how busy a client's main thread is. `(PR #4564) `_ +* C: Added ``fdb_transaction_get_range_and_hop`` function to support running queries based on another query in one request. `(PR #5609) `_ +* Java: Added ``Transaction.getRangeAndHop`` function to support running queries based on another query in one request. `(PR #5609) `_ Other Changes ------------- diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 837d4ec7933..6e4a22247d0 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -369,6 +369,7 @@ class DatabaseContext : public ReferenceCounted, public FastAll Counter transactionGetKeyRequests; Counter transactionGetValueRequests; Counter transactionGetRangeRequests; + Counter transactionGetRangeAndHopRequests; Counter transactionGetRangeStreamRequests; Counter transactionWatchRequests; Counter transactionGetAddressesForKeyRequests; diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index cf304202bb6..bda30afa708 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -59,6 +59,12 @@ class ITransaction { GetRangeLimits limits, bool snapshot = false, bool reverse = false) = 0; + virtual ThreadFuture getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) = 0; virtual ThreadFuture>> getAddressesForKey(const KeyRef& key) = 0; virtual ThreadFuture> getVersionstamp() = 0; diff --git a/fdbclient/ISingleThreadTransaction.h b/fdbclient/ISingleThreadTransaction.h index 9228184593f..edd16103d3c 100644 --- a/fdbclient/ISingleThreadTransaction.h +++ b/fdbclient/ISingleThreadTransaction.h @@ -63,6 +63,12 @@ class ISingleThreadTransaction : public ReferenceCounted getRangeAndHop(KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) = 0; virtual Future>> getAddressesForKey(Key const& key) = 0; virtual Future>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0; virtual Future getEstimatedRangeSizeBytes(KeyRange const& keys) = 0; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 9d701439d92..f664cccd4d7 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -141,6 +141,41 @@ ThreadFuture DLTransaction::getRange(const KeyRangeRef& keys, return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse); } +ThreadFuture DLTransaction::getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) { + FdbCApi::FDBFuture* f = api->transactionGetRangeAndHop(tr, + begin.getKey().begin(), + begin.getKey().size(), + begin.orEqual, + begin.offset, + end.getKey().begin(), + end.getKey().size(), + end.orEqual, + end.offset, + hopInfo.begin(), + hopInfo.size(), + limits.rows, + limits.bytes, + FDB_STREAMING_MODE_EXACT, + 0, + snapshot, + reverse); + return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { + const FdbCApi::FDBKeyValue* kvs; + int count; + FdbCApi::fdb_bool_t more; + FdbCApi::fdb_error_t error = api->futureGetKeyValueArray(f, &kvs, &count, &more); + ASSERT(!error); + + // The memory for this is stored in the FDBFuture and is released when the future gets destroyed + return RangeResult(RangeResultRef(VectorRef((KeyValueRef*)kvs, count), more), Arena()); + }); +} + ThreadFuture>> DLTransaction::getAddressesForKey(const KeyRef& key) { FdbCApi::FDBFuture* f = api->transactionGetAddressesForKey(tr, key.begin(), key.size()); @@ -452,6 +487,7 @@ void DLApi::init() { loadClientFunction(&api->transactionGetKey, lib, fdbCPath, "fdb_transaction_get_key"); loadClientFunction(&api->transactionGetAddressesForKey, lib, fdbCPath, "fdb_transaction_get_addresses_for_key"); loadClientFunction(&api->transactionGetRange, lib, fdbCPath, "fdb_transaction_get_range"); + loadClientFunction(&api->transactionGetRangeAndHop, lib, fdbCPath, "fdb_transaction_get_range_and_hop"); loadClientFunction( &api->transactionGetVersionstamp, lib, fdbCPath, "fdb_transaction_get_versionstamp", headerVersion >= 410); loadClientFunction(&api->transactionSet, lib, fdbCPath, "fdb_transaction_set"); @@ -731,6 +767,18 @@ ThreadFuture MultiVersionTransaction::getRange(const KeyRangeRef& k return abortableFuture(f, tr.onChange); } +ThreadFuture MultiVersionTransaction::getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) { + auto tr = getTransaction(); + auto f = tr.transaction ? tr.transaction->getRangeAndHop(begin, end, hopInfo, limits, snapshot, reverse) + : makeTimeout(); + return abortableFuture(f, tr.onChange); +} + ThreadFuture> MultiVersionTransaction::getVersionstamp() { auto tr = getTransaction(); auto f = tr.transaction ? tr.transaction->getVersionstamp() : makeTimeout>(); diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index 95d9a8b14cc..e772a7d50ed 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -118,6 +118,23 @@ struct FdbCApi : public ThreadSafeReferenceCounted { int iteration, fdb_bool_t snapshot, fdb_bool_t reverse); + FDBFuture* (*transactionGetRangeAndHop)(FDBTransaction* tr, + uint8_t const* beginKeyName, + int beginKeyNameLength, + fdb_bool_t beginOrEqual, + int beginOffset, + uint8_t const* endKeyName, + int endKeyNameLength, + fdb_bool_t endOrEqual, + int endOffset, + uint8_t const* hop_info_name, + int hop_info_name_length, + int limit, + int targetBytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); FDBFuture* (*transactionGetVersionstamp)(FDBTransaction* tr); void (*transactionSet)(FDBTransaction* tr, @@ -219,6 +236,12 @@ class DLTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; @@ -360,6 +383,12 @@ class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted< GetRangeLimits limits, bool snapshot = false, bool reverse = false) override; + ThreadFuture getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 117524a43a3..c3f542b34ac 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -160,6 +160,8 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics)); + queueModel.updateTssEndpoint(ssi.getKeyValuesAndHop.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssi.getKeyValuesAndHop.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValuesStream.getEndpoint(), metrics)); @@ -183,6 +185,7 @@ void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) { queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first()); + queueModel.removeTssEndpoint(ssi.getKeyValuesAndHop.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first()); @@ -1196,6 +1199,7 @@ DatabaseContext::DatabaseContext(Reference watchValueMap(Future version, return Void(); } -void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesRequest& req) { +template +void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesMaybeHopRequest& req) { if (limits.bytes != 0) { if (!limits.hasRowLimit()) req.limit = CLIENT_KNOBS->REPLY_BYTE_LIMIT; // Can't get more than this many rows anyway @@ -3049,26 +3055,47 @@ void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesRe } } -ACTOR Future getExactRange(Database cx, - Version version, - KeyRange keys, - GetRangeLimits limits, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +template +RequestStream StorageServerInterface::*getRangeRequestStream() { + if constexpr (std::is_same::value) { + return &StorageServerInterface::getKeyValues; + } else if (std::is_same::value) { + return &StorageServerInterface::getKeyValuesAndHop; + } else { + UNREACHABLE(); + } +} + +ACTOR template +Future getExactRange(Database cx, + Version version, + KeyRange keys, + Key hopInfo, + GetRangeLimits limits, + Reverse reverse, + TransactionInfo info, + TagSet tags) { state RangeResult output; state Span span("NAPI:getExactRange"_loc, info.spanID); // printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str()); loop { - state std::vector>> locations = wait(getKeyRangeLocations( - cx, keys, CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, reverse, &StorageServerInterface::getKeyValues, info)); + state std::vector>> locations = + wait(getKeyRangeLocations(cx, + keys, + CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, + reverse, + getRangeRequestStream(), + info)); ASSERT(locations.size()); state int shard = 0; loop { const KeyRangeRef& range = locations[shard].first; - GetKeyValuesRequest req; + GetKeyValuesMaybeHopRequest req; + req.hopInfo = hopInfo; + req.arena.dependsOn(hopInfo.arena()); + req.version = version; req.begin = firstGreaterOrEqual(range.begin); req.end = firstGreaterOrEqual(range.end); @@ -3098,14 +3125,14 @@ ACTOR Future getExactRange(Database cx, .detail("Servers", locations[shard].second->description());*/ } ++cx->transactionPhysicalReads; - state GetKeyValuesReply rep; + state GetKeyValuesMaybeHopReply rep; try { choose { when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); } - when(GetKeyValuesReply _rep = + when(GetKeyValuesMaybeHopReply _rep = wait(loadBalance(cx.getPtr(), locations[shard].second, - &StorageServerInterface::getKeyValues, + getRangeRequestStream(), req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3155,7 +3182,7 @@ ACTOR Future getExactRange(Database cx, .detail("BlockBytes", rep.data.expectedSize()); ASSERT(false); } - TEST(true); // GetKeyValuesReply.more in getExactRange + TEST(true); // GetKeyValuesMaybeHopReply.more in getExactRange // Make next request to the same shard with a beginning key just after the last key returned if (reverse) locations[shard].first = @@ -3231,14 +3258,16 @@ Future resolveKey(Database const& cx, return getKey(cx, key, version, info, tags); } -ACTOR Future getRangeFallback(Database cx, - Version version, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +ACTOR template +Future getRangeFallback(Database cx, + Version version, + KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Reverse reverse, + TransactionInfo info, + TagSet tags) { if (version == latestVersion) { state Transaction transaction(cx); transaction.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY); @@ -3261,7 +3290,8 @@ ACTOR Future getRangeFallback(Database cx, // if b is allKeys.begin, we have either read through the beginning of the database, // or allKeys.begin exists in the database and will be part of the conflict range anyways - RangeResult _r = wait(getExactRange(cx, version, KeyRangeRef(b, e), limits, reverse, info, tags)); + RangeResult _r = wait(getExactRange( + cx, version, KeyRangeRef(b, e), hopInfo, limits, reverse, info, tags)); RangeResult r = _r; if (b == allKeys.begin && ((reverse && !r.more) || !reverse)) @@ -3286,6 +3316,7 @@ ACTOR Future getRangeFallback(Database cx, return r; } +// TODO: Client should add hop keys to conflict ranges. void getRangeFinished(Database cx, Reference trLogInfo, double startTime, @@ -3340,17 +3371,23 @@ void getRangeFinished(Database cx, } } -ACTOR Future getRange(Database cx, - Reference trLogInfo, - Future fVersion, - KeySelector begin, - KeySelector end, - GetRangeLimits limits, - Promise> conflictRange, - Snapshot snapshot, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +// GetKeyValuesMaybeHopRequest: GetKeyValuesRequest or GetKeyValuesAndHopRequest +// GetKeyValuesMaybeHopReply: GetKeyValuesReply or GetKeyValuesAndHopReply +// Sadly we need GetKeyValuesMaybeHopReply because cannot do something like: state +// REPLY_TYPE(GetKeyValuesMaybeHopRequest) rep; +ACTOR template +Future getRange(Database cx, + Reference trLogInfo, + Future fVersion, + KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Promise> conflictRange, + Snapshot snapshot, + Reverse reverse, + TransactionInfo info, + TagSet tags) { state GetRangeLimits originalLimits(limits); state KeySelector originalBegin = begin; state KeySelector originalEnd = end; @@ -3384,11 +3421,13 @@ ACTOR Future getRange(Database cx, Key locationKey = reverse ? Key(end.getKey(), end.arena()) : Key(begin.getKey(), begin.arena()); Reverse locationBackward{ reverse ? (end - 1).isBackward() : begin.isBackward() }; - state std::pair> beginServer = - wait(getKeyLocation(cx, locationKey, &StorageServerInterface::getKeyValues, info, locationBackward)); + state std::pair> beginServer = wait(getKeyLocation( + cx, locationKey, getRangeRequestStream(), info, locationBackward)); state KeyRange shard = beginServer.first; state bool modifiedSelectors = false; - state GetKeyValuesRequest req; + state GetKeyValuesMaybeHopRequest req; + req.hopInfo = hopInfo; + req.arena.dependsOn(hopInfo.arena()); req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys); req.version = readVersion; @@ -3447,17 +3486,17 @@ ACTOR Future getRange(Database cx, } ++cx->transactionPhysicalReads; - state GetKeyValuesReply rep; + state GetKeyValuesMaybeHopReply rep; try { if (CLIENT_BUGGIFY_WITH_PROB(.01)) { throw deterministicRandom()->randomChoice( std::vector{ transaction_too_old(), future_version() }); } // state AnnotateActor annotation(currentLineage); - GetKeyValuesReply _rep = + GetKeyValuesMaybeHopReply _rep = wait(loadBalance(cx.getPtr(), beginServer.second, - &StorageServerInterface::getKeyValues, + getRangeRequestStream(), req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3557,11 +3596,12 @@ ACTOR Future getRange(Database cx, if (!rep.more) { ASSERT(modifiedSelectors); - TEST(true); // !GetKeyValuesReply.more and modifiedSelectors in getRange + TEST(true); // !GetKeyValuesMaybeHopReply.more and modifiedSelectors in getRange if (!rep.data.size()) { - RangeResult result = wait(getRangeFallback( - cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); + RangeResult result = + wait(getRangeFallback( + cx, version, originalBegin, originalEnd, hopInfo, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -3579,7 +3619,7 @@ ACTOR Future getRange(Database cx, else begin = firstGreaterOrEqual(shard.end); } else { - TEST(true); // GetKeyValuesReply.more in getRange + TEST(true); // GetKeyValuesMaybeHopReply.more in getRange if (reverse) end = firstGreaterOrEqual(output[output.size() - 1].key); else @@ -3597,8 +3637,9 @@ ACTOR Future getRange(Database cx, Reverse{ reverse ? (end - 1).isBackward() : begin.isBackward() }); if (e.code() == error_code_wrong_shard_server) { - RangeResult result = wait(getRangeFallback( - cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); + RangeResult result = + wait(getRangeFallback( + cx, version, originalBegin, originalEnd, hopInfo, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -4164,17 +4205,18 @@ Future getRange(Database const& cx, Reverse const& reverse, TransactionInfo const& info, TagSet const& tags) { - return getRange(cx, - Reference(), - fVersion, - begin, - end, - limits, - Promise>(), - Snapshot::True, - reverse, - info, - tags); + return getRange(cx, + Reference(), + fVersion, + begin, + end, + ""_sr, + limits, + Promise>(), + Snapshot::True, + reverse, + info, + tags); } bool DatabaseContext::debugUseTags = false; @@ -4469,13 +4511,26 @@ Future Transaction::getKey(const KeySelector& key, Snapshot snapshot) { return getKeyAndConflictRange(cx, key, getReadVersion(), conflictRange, info, options.readTags); } -Future Transaction::getRange(const KeySelector& begin, - const KeySelector& end, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { +template +void increaseCounterForRequest(Database cx) { + if constexpr (std::is_same::value) { + ++cx->transactionGetRangeRequests; + } else if (std::is_same::value) { + ++cx->transactionGetRangeAndHopRequests; + } else { + UNREACHABLE(); + } +} + +template +Future Transaction::getRangeMaybeHop(const KeySelector& begin, + const KeySelector& end, + const Key& hopInfo, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { ++cx->transactionLogicalReads; - ++cx->transactionGetRangeRequests; + increaseCounterForRequest(cx); if (limits.isReached()) return RangeResult(); @@ -4507,8 +4562,37 @@ Future Transaction::getRange(const KeySelector& begin, extraConflictRanges.push_back(conflictRange.getFuture()); } - return ::getRange( - cx, trLogInfo, getReadVersion(), b, e, limits, conflictRange, snapshot, reverse, info, options.readTags); + return ::getRange(cx, + trLogInfo, + getReadVersion(), + b, + e, + hopInfo, + limits, + conflictRange, + snapshot, + reverse, + info, + options.readTags); +} + +Future Transaction::getRange(const KeySelector& begin, + const KeySelector& end, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { + return getRangeMaybeHop(begin, end, ""_sr, limits, snapshot, reverse); +} + +Future Transaction::getRangeAndHop(const KeySelector& begin, + const KeySelector& end, + const Key& hopInfo, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { + + return getRangeMaybeHop( + begin, end, hopInfo, limits, snapshot, reverse); } Future Transaction::getRange(const KeySelector& begin, diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index 6bd5ab892e0..fddbbcdf40f 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -289,6 +289,23 @@ class Transaction : NonCopyable { reverse); } + [[nodiscard]] Future getRangeAndHop(const KeySelector& begin, + const KeySelector& end, + const Key& hopInfo, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False); + +private: + template + Future getRangeMaybeHop(const KeySelector& begin, + const KeySelector& end, + const Key& hopInfo, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse); + +public: // A method for streaming data from the storage server that is more efficient than getRange when reading large // amounts of data [[nodiscard]] Future getRangeStream(const PromiseStream>& results, diff --git a/fdbclient/PaxosConfigTransaction.h b/fdbclient/PaxosConfigTransaction.h index 4dfceb7a28e..509e829b452 100644 --- a/fdbclient/PaxosConfigTransaction.h +++ b/fdbclient/PaxosConfigTransaction.h @@ -50,6 +50,14 @@ class PaxosConfigTransaction final : public IConfigTransaction, public FastAlloc GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; + Future getRangeAndHop(KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override { + throw client_invalid_operation(); + } void set(KeyRef const& key, ValueRef const& value) override; void clear(KeyRangeRef const&) override { throw client_invalid_operation(); } void clear(KeyRef const&) override; diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index f156a36c85d..cd2501d0ad1 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -74,6 +74,16 @@ class RYWImpl { using Result = RangeResult; }; + template + struct GetRangeAndHopReq { + GetRangeAndHopReq(KeySelector begin, KeySelector end, Key hopInfo, GetRangeLimits limits) + : begin(begin), end(end), hopInfo(hopInfo), limits(limits) {} + KeySelector begin, end; + Key hopInfo; + GetRangeLimits limits; + using Result = RangeResult; + }; + // read() Performs a read (get, getKey, getRange, etc), in the context of the given transaction. Snapshot or RYW // reads are distingushed by the type Iter being SnapshotCache::iterator or RYWIterator. Fills in the snapshot cache // as a side effect but does not affect conflict ranges. Some (indicated) overloads of read are required to update @@ -203,6 +213,36 @@ class RYWImpl { return v; } + ACTOR template + static Future readThroughAndHop(ReadYourWritesTransaction* ryw, + GetRangeAndHopReq read, + Snapshot snapshot) { + if (backwards && read.end.offset > 1) { + // FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result + // actually does. + Key key = wait(ryw->tr.getKey(read.end, snapshot)); + if (key > ryw->getMaxReadKey()) + read.end = firstGreaterOrEqual(ryw->getMaxReadKey()); + else + read.end = KeySelector(firstGreaterOrEqual(key), key.arena()); + } + + RangeResult v = wait(ryw->tr.getRangeAndHop( + read.begin, read.end, read.hopInfo, read.limits, snapshot, backwards ? Reverse::True : Reverse::False)); + KeyRef maxKey = ryw->getMaxReadKey(); + if (v.size() > 0) { + if (!backwards && v[v.size() - 1].key >= maxKey) { + state RangeResult _v = v; + int i = _v.size() - 2; + for (; i >= 0 && _v[i].key >= maxKey; --i) { + } + return RangeResult(RangeResultRef(VectorRef(&_v[0], i + 1), false), _v.arena()); + } + } + + return v; + } + // addConflictRange(ryw,read,result) is called after a serializable read and is responsible for adding the relevant // conflict range @@ -309,6 +349,15 @@ class RYWImpl { } } ACTOR template + static Future readWithConflictRangeThroughAndHop(ReadYourWritesTransaction* ryw, + Req req, + Snapshot snapshot) { + choose { + when(typename Req::Result result = wait(readThroughAndHop(ryw, req, snapshot))) { return result; } + when(wait(ryw->resetPromise.getFuture())) { throw internal_error(); } + } + } + ACTOR template static Future readWithConflictRangeSnapshot(ReadYourWritesTransaction* ryw, Req req) { state SnapshotCache::iterator it(&ryw->cache, &ryw->writes); choose { @@ -344,6 +393,20 @@ class RYWImpl { return readWithConflictRangeRYW(ryw, req, snapshot); } + template + static inline Future readWithConflictRangeAndHop(ReadYourWritesTransaction* ryw, + Req const& req, + Snapshot snapshot) { + if (ryw->options.readYourWritesDisabled) { + return readWithConflictRangeThroughAndHop(ryw, req, snapshot); + } else if (snapshot && ryw->options.snapshotRywEnabled <= 0) { + TEST(true); // readWithConflictRangeSnapshot not supported for hop + throw client_invalid_operation(); + } + TEST(true); // readWithConflictRangeRYW not supported for hop + throw client_invalid_operation(); + } + template static void resolveKeySelectorFromCache(KeySelector& key, Iter& it, @@ -1509,6 +1572,65 @@ Future ReadYourWritesTransaction::getRange(const KeySelector& begin return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } +Future ReadYourWritesTransaction::getRangeAndHop(KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { + if (getDatabase()->apiVersionAtLeast(630)) { + if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() && + end.getKey() <= specialKeys.end) { + TEST(true); // Special key space get range (Hop) + throw client_invalid_operation(); // Not support special keys. + } + } else { + if (begin.getKey() == LiteralStringRef("\xff\xff/worker_interfaces")) { + throw client_invalid_operation(); // Not support special keys. + } + } + + if (checkUsedDuringCommit()) { + return used_during_commit(); + } + + if (resetPromise.isSet()) + return resetPromise.getFuture().getError(); + + KeyRef maxKey = getMaxReadKey(); + if (begin.getKey() > maxKey || end.getKey() > maxKey) + return key_outside_legal_range(); + + // This optimization prevents nullptr operations from being added to the conflict range + if (limits.isReached()) { + TEST(true); // RYW range read limit 0 (Hop) + return RangeResult(); + } + + if (!limits.isValid()) + return range_limits_invalid(); + + if (begin.orEqual) + begin.removeOrEqual(begin.arena()); + + if (end.orEqual) + end.removeOrEqual(end.arena()); + + if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) { + TEST(true); // RYW range inverted (Hop) + return RangeResult(); + } + + Future result = + reverse ? RYWImpl::readWithConflictRangeAndHop( + this, RYWImpl::GetRangeAndHopReq(begin, end, hopInfo, limits), snapshot) + : RYWImpl::readWithConflictRangeAndHop( + this, RYWImpl::GetRangeAndHopReq(begin, end, hopInfo, limits), snapshot); + + reading.add(success(result)); + return result; +} + Future>> ReadYourWritesTransaction::getAddressesForKey(const Key& key) { if (checkUsedDuringCommit()) { return used_during_commit(); diff --git a/fdbclient/ReadYourWrites.h b/fdbclient/ReadYourWrites.h index 3ac84a76583..d301b094ec1 100644 --- a/fdbclient/ReadYourWrites.h +++ b/fdbclient/ReadYourWrites.h @@ -104,6 +104,12 @@ class ReadYourWritesTransaction final : NonCopyable, snapshot, reverse); } + Future getRangeAndHop(KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override; [[nodiscard]] Future>> getAddressesForKey(const Key& key) override; Future>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override; diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 44f0ec6e2ce..6ee837b8a18 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -644,6 +644,8 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1; init( ENABLE_CLEAR_RANGE_EAGER_READS, true ); + init( QUICK_GET_VALUE_FALLBACK, true ); + init( QUICK_GET_KEY_VALUES_FALLBACK, true ); //Wait Failure init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2; diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index 6a350652045..b8357535c8e 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -585,6 +585,8 @@ class ServerKnobs : public KnobsImpl { double MAX_STORAGE_COMMIT_TIME; int64_t RANGESTREAM_LIMIT_BYTES; bool ENABLE_CLEAR_RANGE_EAGER_READS; + bool QUICK_GET_VALUE_FALLBACK; + bool QUICK_GET_KEY_VALUES_FALLBACK; // Wait Failure int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS; diff --git a/fdbclient/SimpleConfigTransaction.h b/fdbclient/SimpleConfigTransaction.h index 36423b1515f..ddd39e9e8c6 100644 --- a/fdbclient/SimpleConfigTransaction.h +++ b/fdbclient/SimpleConfigTransaction.h @@ -59,6 +59,14 @@ class SimpleConfigTransaction final : public IConfigTransaction, public FastAllo GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; + Future getRangeAndHop(KeySelector begin, + KeySelector end, + Key hopInfo, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override { + throw client_invalid_operation(); + } Future commit() override; Version getCommittedVersion() const override; void setOption(FDBTransactionOptions::Option option, Optional value = Optional()) override; diff --git a/fdbclient/StorageServerInterface.cpp b/fdbclient/StorageServerInterface.cpp index e2d403fc2f9..8f2b8637cbf 100644 --- a/fdbclient/StorageServerInterface.cpp +++ b/fdbclient/StorageServerInterface.cpp @@ -152,6 +152,45 @@ void TSS_traceMismatch(TraceEvent& event, .detail("TSSReply", tssResultsString); } +// range reads and hop +template <> +bool TSS_doCompare(const GetKeyValuesAndHopReply& src, const GetKeyValuesAndHopReply& tss) { + return src.more == tss.more && src.data == tss.data; +} + +template <> +const char* TSS_mismatchTraceName(const GetKeyValuesAndHopRequest& req) { + return "TSSMismatchGetKeyValuesAndHop"; +} + +template <> +void TSS_traceMismatch(TraceEvent& event, + const GetKeyValuesAndHopRequest& req, + const GetKeyValuesAndHopReply& src, + const GetKeyValuesAndHopReply& tss) { + std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : ""); + for (auto& it : src.data) { + ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); + } + + std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : ""); + for (auto& it : tss.data) { + tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); + } + event + .detail( + "Begin", + format("%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset)) + .detail("End", + format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset)) + .detail("Version", req.version) + .detail("Limit", req.limit) + .detail("LimitBytes", req.limitBytes) + .setMaxFieldLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE * 4 / 10) + .detail("SSReply", ssResultsString) + .detail("TSSReply", tssResultsString); +} + // streaming range reads template <> bool TSS_doCompare(const GetKeyValuesStreamReply& src, const GetKeyValuesStreamReply& tss) { @@ -356,6 +395,12 @@ void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, TSSgetKeyValuesLatency.addSample(tssLatency); } +template <> +void TSSMetrics::recordLatency(const GetKeyValuesAndHopRequest& req, double ssLatency, double tssLatency) { + SSgetKeyValuesAndHopLatency.addSample(ssLatency); + TSSgetKeyValuesAndHopLatency.addSample(tssLatency); +} + template <> void TSSMetrics::recordLatency(const WatchValueRequest& req, double ssLatency, double tssLatency) {} diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index 1671abe67d4..4e2af9c482f 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -22,6 +22,7 @@ #define FDBCLIENT_STORAGESERVERINTERFACE_H #pragma once +#include #include "fdbclient/FDBTypes.h" #include "fdbrpc/Locality.h" #include "fdbrpc/QueueModel.h" @@ -65,6 +66,7 @@ struct StorageServerInterface { // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read RequestStream getKeyValues; + RequestStream getKeyValuesAndHop; RequestStream getShardState; RequestStream waitMetrics; @@ -129,6 +131,8 @@ struct StorageServerInterface { RequestStream(getValue.getEndpoint().getAdjustedEndpoint(15)); changeFeedPop = RequestStream(getValue.getEndpoint().getAdjustedEndpoint(16)); + getKeyValuesAndHop = + RequestStream(getValue.getEndpoint().getAdjustedEndpoint(17)); } } else { ASSERT(Ar::isDeserializing); @@ -174,6 +178,7 @@ struct StorageServerInterface { streams.push_back(changeFeedStream.getReceiver()); streams.push_back(overlappingChangeFeeds.getReceiver()); streams.push_back(changeFeedPop.getReceiver()); + streams.push_back(getKeyValuesAndHop.getReceiver(TaskPriority::LoadBalancedEndpoint)); FlowTransport::transport().addEndpoints(streams); } }; @@ -296,6 +301,9 @@ struct GetKeyValuesRequest : TimedRequest { SpanID spanContext; Arena arena; KeySelectorRef begin, end; + // This is a dummy field there has never been used. + // TODO: Get rid of this by constexpr or other template magic in getRange + KeyRef hopInfo = KeyRef(); Version version; // or latestVersion int limit, limitBytes; bool isFetchKeys; @@ -310,6 +318,44 @@ struct GetKeyValuesRequest : TimedRequest { } }; +struct GetKeyValuesAndHopReply : public LoadBalancedReply { + constexpr static FileIdentifier file_identifier = 1783067; + Arena arena; + // The key is the key in the requested range rather than the hop key. + VectorRef data; + Version version; // useful when latestVersion was requested + bool more; + bool cached = false; + + GetKeyValuesAndHopReply() : version(invalidVersion), more(false), cached(false) {} + + template + void serialize(Ar& ar) { + serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, data, version, more, cached, arena); + } +}; + +struct GetKeyValuesAndHopRequest : TimedRequest { + constexpr static FileIdentifier file_identifier = 6795747; + SpanID spanContext; + Arena arena; + KeySelectorRef begin, end; + KeyRef hopInfo; + Version version; // or latestVersion + int limit, limitBytes; + bool isFetchKeys; + Optional tags; + Optional debugID; + ReplyPromise reply; + + GetKeyValuesAndHopRequest() : isFetchKeys(false) {} + template + void serialize(Ar& ar) { + serializer( + ar, begin, end, hopInfo, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, spanContext, arena); + } +}; + struct GetKeyValuesStreamReply : public ReplyPromiseStreamReply { constexpr static FileIdentifier file_identifier = 1783066; Arena arena; diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index 3810d081910..f03b10d8c2f 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -257,6 +257,23 @@ ThreadFuture ThreadSafeTransaction::getRange(const KeySelectorRef& }); } +ThreadFuture ThreadSafeTransaction::getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) { + KeySelector b = begin; + KeySelector e = end; + Key h = hopInfo; + + ISingleThreadTransaction* tr = this->tr; + return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future { + tr->checkDeferredError(); + return tr->getRangeAndHop(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse }); + }); +} + ThreadFuture>> ThreadSafeTransaction::getAddressesForKey(const KeyRef& key) { Key k = key; diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index 75faa677458..b8ff9b7bfb3 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -106,6 +106,12 @@ class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndHop(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& hopInfo, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; diff --git a/fdbrpc/TSSComparison.h b/fdbrpc/TSSComparison.h index af5080af6ff..f230f30fd86 100644 --- a/fdbrpc/TSSComparison.h +++ b/fdbrpc/TSSComparison.h @@ -51,10 +51,12 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { ContinuousSample SSgetValueLatency; ContinuousSample SSgetKeyLatency; ContinuousSample SSgetKeyValuesLatency; + ContinuousSample SSgetKeyValuesAndHopLatency; ContinuousSample TSSgetValueLatency; ContinuousSample TSSgetKeyLatency; ContinuousSample TSSgetKeyValuesLatency; + ContinuousSample TSSgetKeyValuesAndHopLatency; std::unordered_map ssErrorsByCode; std::unordered_map tssErrorsByCode; @@ -103,7 +105,8 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { : cc("TSSClientMetrics"), requests("Requests", cc), streamComparisons("StreamComparisons", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc), tssTimeouts("TSSTimeouts", cc), mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000), SSgetKeyValuesLatency(1000), - TSSgetValueLatency(1000), TSSgetKeyLatency(1000), TSSgetKeyValuesLatency(1000) {} + SSgetKeyValuesAndHopLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), + TSSgetKeyValuesLatency(1000), TSSgetKeyValuesAndHopLatency(1000) {} }; template diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index ca7d7d6db5b..29f01a15953 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -208,6 +208,7 @@ set(FDBSERVER_SRCS workloads/MemoryLifetime.actor.cpp workloads/MetricLogging.actor.cpp workloads/MutationLogReaderCorrectness.actor.cpp + workloads/IndexPrefetchDemo.actor.cpp workloads/ParallelRestore.actor.cpp workloads/Performance.actor.cpp workloads/Ping.actor.cpp diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index c7bb89afc6f..cdec6c3375e 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -42,6 +42,7 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/Notified.h" #include "fdbclient/StatusClient.h" +#include "fdbclient/Tuple.h" #include "fdbclient/SystemData.h" #include "fdbclient/TransactionLineage.h" #include "fdbclient/VersionedMap.h" @@ -779,8 +780,9 @@ struct StorageServer { struct Counters { CounterCollection cc; - Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeStreamQueries, finishedQueries, - lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries; + Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeAndHopQueries, + getRangeStreamQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, + emptyQueries; // Bytes of the mutations that have been added to the memory of the storage server. When the data is durable // and cleared from the memory, we do not subtract it but add it to bytesDurable. @@ -807,6 +809,9 @@ struct StorageServer { Counter wrongShardServer; Counter fetchedVersions; Counter fetchesFromLogs; + // The following counters measure how many of "hop"s in the getRangeAndHopQueries are effective. "Miss" means + // fallback if fallback is enabled, otherwise means failure (so that another layer could implement fallback). + Counter quickGetValueHit, quickGetValueMiss, quickGetKeyValuesHit, quickGetKeyValuesMiss; LatencySample readLatencySample; LatencyBands readLatencyBands; @@ -814,22 +819,25 @@ struct StorageServer { Counters(StorageServer* self) : cc("StorageServer", self->thisServerID.toString()), allQueries("QueryQueue", cc), getKeyQueries("GetKeyQueries", cc), getValueQueries("GetValueQueries", cc), - getRangeQueries("GetRangeQueries", cc), getRangeStreamQueries("GetRangeStreamQueries", cc), - finishedQueries("FinishedQueries", cc), lowPriorityQueries("LowPriorityQueries", cc), - rowsQueried("RowsQueried", cc), bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), - emptyQueries("EmptyQueries", cc), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), - bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc), - sampledBytesCleared("SampledBytesCleared", cc), kvFetched("KVFetched", cc), mutations("Mutations", cc), - setMutations("SetMutations", cc), clearRangeMutations("ClearRangeMutations", cc), - atomicMutations("AtomicMutations", cc), updateBatches("UpdateBatches", cc), - updateVersions("UpdateVersions", cc), loops("Loops", cc), fetchWaitingMS("FetchWaitingMS", cc), - fetchWaitingCount("FetchWaitingCount", cc), fetchExecutingMS("FetchExecutingMS", cc), - fetchExecutingCount("FetchExecutingCount", cc), readsRejected("ReadsRejected", cc), - wrongShardServer("WrongShardServer", cc), fetchedVersions("FetchedVersions", cc), - fetchesFromLogs("FetchesFromLogs", cc), readLatencySample("ReadLatencyMetrics", - self->thisServerID, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + getRangeQueries("GetRangeQueries", cc), getRangeAndHopQueries("GetRangeAndHopQueries", cc), + getRangeStreamQueries("GetRangeStreamQueries", cc), finishedQueries("FinishedQueries", cc), + lowPriorityQueries("LowPriorityQueries", cc), rowsQueried("RowsQueried", cc), + bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), emptyQueries("EmptyQueries", cc), + bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), bytesFetched("BytesFetched", cc), + mutationBytes("MutationBytes", cc), sampledBytesCleared("SampledBytesCleared", cc), + kvFetched("KVFetched", cc), mutations("Mutations", cc), setMutations("SetMutations", cc), + clearRangeMutations("ClearRangeMutations", cc), atomicMutations("AtomicMutations", cc), + updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc), + fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc), + fetchExecutingMS("FetchExecutingMS", cc), fetchExecutingCount("FetchExecutingCount", cc), + readsRejected("ReadsRejected", cc), wrongShardServer("WrongShardServer", cc), + fetchedVersions("FetchedVersions", cc), fetchesFromLogs("FetchesFromLogs", cc), + quickGetValueHit("QuickGetValueHit", cc), quickGetValueMiss("QuickGetValueMiss", cc), + quickGetKeyValuesHit("QuickGetKeyValuesHit", cc), quickGetKeyValuesMiss("QuickGetKeyValuesMiss", cc), + readLatencySample("ReadLatencyMetrics", + self->thisServerID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE), readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) { specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; }); specialCounter(cc, "Version", [self]() { return self->version.get(); }); @@ -1985,6 +1993,37 @@ void merge(Arena& arena, } } +ACTOR Future> quickGetValue(StorageServer* data, StringRef key, Version version) { + if (data->shards[key]->isReadable()) { + try { + // TODO: Use a lower level API may be better? Or tweak priorities? + GetValueRequest req(Span().context, key, version, Optional(), Optional()); + data->actors.add(data->readGuard(req, getValueQ)); + GetValueReply reply = wait(req.reply.getFuture()); + ++data->counters.quickGetValueHit; + return reply.value; + } catch (Error& e) { + // Fallback. + } + } else { + // Fallback. + } + + ++data->counters.quickGetValueMiss; + if (SERVER_KNOBS->QUICK_GET_VALUE_FALLBACK) { + state Transaction tr(data->cx); + tr.setVersion(version); + // TODO: is DefaultPromiseEndpoint the best priority for this? + tr.info.taskID = TaskPriority::DefaultPromiseEndpoint; + Future> valueFuture = tr.get(key, Snapshot::True); + // TODO: async in case it needs to read from other servers. + state Optional valueOption = wait(valueFuture); + return valueOption; + } else { + throw hop_quick_get_value_miss(); + } +}; + // If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending). // readRange has O(|result|) + O(log |data|) cost ACTOR Future readRange(StorageServer* data, @@ -2470,6 +2509,440 @@ ACTOR Future getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req) return Void(); } +ACTOR Future quickGetKeyValues(StorageServer* data, StringRef prefix, Version version) { + try { + // TODO: Use a lower level API may be better? Or tweak priorities? + GetKeyValuesRequest req; + req.spanContext = Span().context; + req.arena = Arena(); + req.begin = firstGreaterOrEqual(KeyRef(req.arena, prefix)); + req.end = firstGreaterOrEqual(strinc(prefix, req.arena)); + req.version = version; + + data->actors.add(data->readGuard(req, getKeyValuesQ)); + GetKeyValuesReply reply = wait(req.reply.getFuture()); + ++data->counters.quickGetKeyValuesHit; + + // Convert GetKeyValuesReply to RangeResult. + return RangeResult(RangeResultRef(reply.data, reply.more), reply.arena); + } catch (Error& e) { + // Fallback. + } + + ++data->counters.quickGetKeyValuesMiss; + if (SERVER_KNOBS->QUICK_GET_KEY_VALUES_FALLBACK) { + state Transaction tr(data->cx); + tr.setVersion(version); + // TODO: is DefaultPromiseEndpoint the best priority for this? + tr.info.taskID = TaskPriority::DefaultPromiseEndpoint; + Future rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True); + // TODO: async in case it needs to read from other servers. + RangeResult rangeResult = wait(rangeResultFuture); + return rangeResult; + } else { + throw hop_quick_get_key_values_miss(); + } +}; + +Key constructHopKey(KeyValueRef* keyValue, Tuple& hopKeyFormatTuple, bool& isRangeQuery) { + // Lazily parse key and/or value to tuple because they may not need to be a tuple if not used. + Optional keyTuple; + Optional valueTuple; + + Tuple hopKeyTuple; + for (int i = 0; i < hopKeyFormatTuple.size(); i++) { + Tuple::ElementType type = hopKeyFormatTuple.getType(i); + if (type == Tuple::BYTES || type == Tuple::UTF8) { + std::string s = hopKeyFormatTuple.getString(i).toString(); + auto sz = s.size(); + + // Handle escape. + bool escaped = false; + size_t p = 0; + while (true) { + size_t found = s.find("{{", p); + if (found == std::string::npos) { + break; + } + s.replace(found, 2, "{"); + p += 1; + escaped = true; + } + p = 0; + while (true) { + size_t found = s.find("}}", p); + if (found == std::string::npos) { + break; + } + s.replace(found, 2, "}"); + p += 1; + escaped = true; + } + if (escaped) { + // If the element uses escape, cope the escaped version. + hopKeyTuple.append(s); + } + // {K[??]} or {V[??]} + else if (sz > 5 && s[0] == '{' && (s[1] == 'K' || s[1] == 'V') && s[2] == '[' && s[sz - 2] == ']' && + s[sz - 1] == '}') { + int idx; + try { + idx = std::stoi(s.substr(3, sz - 5)); + } catch (std::exception& e) { + throw hop_bad_index(); + } + Tuple* referenceTuple; + if (s[1] == 'K') { + // Use keyTuple as reference. + if (!keyTuple.present()) { + // May throw exception if the key is not parsable as a tuple. + keyTuple = Tuple::unpack(keyValue->key); + } + referenceTuple = &keyTuple.get(); + } else if (s[1] == 'V') { + // Use valueTuple as reference. + if (!valueTuple.present()) { + // May throw exception if the value is not parsable as a tuple. + valueTuple = Tuple::unpack(keyValue->value); + } + referenceTuple = &valueTuple.get(); + } else { + ASSERT(false); + throw internal_error(); + } + + if (idx < 0 || idx >= referenceTuple->size()) { + throw hop_bad_index(); + } + hopKeyTuple.append(referenceTuple->subTuple(idx, idx + 1)); + } else if (s == "{...}") { + // Range query. + if (i != hopKeyFormatTuple.size() - 1) { + // It must be the last element of the hop info tuple + throw hop_bad_range_decriptor(); + } + // Every record will try to set it. It's ugly, but not wrong. + isRangeQuery = true; + // Do not add it to the hop key. + } else { + // If the element is a string but neither escaped nor descriptors, just copy it. + hopKeyTuple.append(hopKeyFormatTuple.subTuple(i, i + 1)); + } + } else { + // If the element not a string, just copy it. + hopKeyTuple.append(hopKeyFormatTuple.subTuple(i, i + 1)); + } + } + return hopKeyTuple.getDataAsStandalone(); +} + +TEST_CASE("/fdbserver/storageserver/constructHopKey") { + Key key = Tuple().append("key-0"_sr).append("key-1"_sr).append("key-2"_sr).getDataAsStandalone(); + Value value = Tuple().append("value-0"_sr).append("value-1"_sr).append("value-2"_sr).getDataAsStandalone(); + state KeyValueRef kvr(key, value); + { + Tuple hopInfoTuple = Tuple() + .append("normal"_sr) + .append("{{escaped}}"_sr) + .append("{K[2]}"_sr) + .append("{V[0]}"_sr) + .append("{...}"_sr); + + bool isRangeQuery = false; + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + + Key expectedHopKey = Tuple() + .append("normal"_sr) + .append("{escaped}"_sr) + .append("key-2"_sr) + .append("value-0"_sr) + .getDataAsStandalone(); + // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; + ASSERT(hopKey.compare(expectedHopKey) == 0); + ASSERT(isRangeQuery == true); + } + { + Tuple hopInfoTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); + + bool isRangeQuery = false; + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + + Key expectedHopKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); + // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; + ASSERT(hopKey.compare(expectedHopKey) == 0); + ASSERT(isRangeQuery == false); + } + { + Tuple hopInfoTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); + + bool isRangeQuery = false; + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + + Key expectedHopKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); + // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; + ASSERT(hopKey.compare(expectedHopKey) == 0); + ASSERT(isRangeQuery == false); + } + { + Tuple hopInfoTuple = Tuple().append("{K[100]}"_sr); + bool isRangeQuery = false; + state bool throwException = false; + try { + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + } catch (Error& e) { + ASSERT(e.code() == error_code_hop_bad_index); + throwException = true; + } + ASSERT(throwException); + } + { + Tuple hopInfoTuple = Tuple().append("{...}"_sr).append("last-element"_sr); + bool isRangeQuery = false; + state bool throwException2 = false; + try { + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + } catch (Error& e) { + ASSERT(e.code() == error_code_hop_bad_range_decriptor); + throwException2 = true; + } + ASSERT(throwException2); + } + { + Tuple hopInfoTuple = Tuple().append("{K[not-a-number]}"_sr); + bool isRangeQuery = false; + state bool throwException3 = false; + try { + Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + } catch (Error& e) { + ASSERT(e.code() == error_code_hop_bad_index); + throwException3 = true; + } + ASSERT(throwException3); + } + return Void(); +} + +ACTOR Future hop(StorageServer* data, GetKeyValuesReply input, StringRef hopInfo) { + state GetKeyValuesAndHopReply result; + result.version = input.version; + result.more = input.more; + result.cached = input.cached; + result.arena.dependsOn(input.arena); + + result.data.reserve(result.arena, input.data.size()); + state bool isRangeQuery = false; + state Tuple hopKeyFormatTuple = Tuple::unpack(hopInfo); + state KeyValueRef* it = input.data.begin(); + for (; it != input.data.end(); it++) { + state StringRef key = it->key; + + state Key hopKey = constructHopKey(it, hopKeyFormatTuple, isRangeQuery); + // Make sure the hopKey is always available, so that it's good even we want to get key asynchronously. + result.arena.dependsOn(hopKey.arena()); + + if (isRangeQuery) { + // Use the hopKey as the prefix of the range query. + RangeResult rangeResult = wait(quickGetKeyValues(data, hopKey, input.version)); + + if (rangeResult.more) { + // Probably the fan out is too large. The user should use the old way to query. + throw hop_quick_get_key_values_has_more(); + } + result.arena.dependsOn(rangeResult.arena()); + for (int i = 0; i < rangeResult.size(); i++) { + result.data.emplace_back(result.arena, rangeResult[i].key, rangeResult[i].value); + } + } else { + Optional valueOption = wait(quickGetValue(data, hopKey, input.version)); + + if (valueOption.present()) { + Value value = valueOption.get(); + result.arena.dependsOn(value.arena()); + result.data.emplace_back(result.arena, hopKey, value); + } else { + // TODO: Shall we throw exception if the key doesn't exist or the range is empty? + // throw hop_no_such_key(); + } + } + } + return result; +} + +// Most of the actor is copied from getKeyValuesQ. I tried to use templates but things become nearly impossible after +// combining actor shenanigans with template shenanigans. +ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRequest req) +// Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large +// selector offset prevents all data from being read in one range read +{ + state Span span("SS:getKeyValuesAndHop"_loc, { req.spanContext }); + state int64_t resultSize = 0; + state IKeyValueStore::ReadType type = + req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL; + getCurrentLineage()->modify(&TransactionLineage::txID) = req.spanContext.first(); + + ++data->counters.getRangeAndHopQueries; + ++data->counters.allQueries; + ++data->readQueueSizeMetric; + data->maxQueryQueue = std::max( + data->maxQueryQueue, data->counters.allQueries.getValue() - data->counters.finishedQueries.getValue()); + + // Active load balancing runs at a very high priority (to obtain accurate queue lengths) + // so we need to downgrade here + if (SERVER_KNOBS->FETCH_KEYS_LOWER_PRIORITY && req.isFetchKeys) { + wait(delay(0, TaskPriority::FetchKeys)); + } else { + wait(data->getQueryDelay()); + } + + try { + if (req.debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.Before"); + state Version version = wait(waitForVersion(data, req.version, span.context)); + + state uint64_t changeCounter = data->shardChangeCounter; + // try { + state KeyRange shard = getShardKeyRange(data, req.begin); + + if (req.debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterVersion"); + //.detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end); + //} catch (Error& e) { TraceEvent("WrongShardServer", data->thisServerID).detail("Begin", + // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("Shard", + //"None").detail("In", "getKeyValuesAndHop>getShardKeyRange"); throw e; } + + if (!selectorInRange(req.end, shard) && !(req.end.isFirstGreaterOrEqual() && req.end.getKey() == shard.end)) { + // TraceEvent("WrongShardServer1", data->thisServerID).detail("Begin", + // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", + // shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndHop>checkShardExtents"); + throw wrong_shard_server(); + } + + state int offset1 = 0; + state int offset2; + state Future fBegin = req.begin.isFirstGreaterOrEqual() + ? Future(req.begin.getKey()) + : findKey(data, req.begin, version, shard, &offset1, span.context, type); + state Future fEnd = req.end.isFirstGreaterOrEqual() + ? Future(req.end.getKey()) + : findKey(data, req.end, version, shard, &offset2, span.context, type); + state Key begin = wait(fBegin); + state Key end = wait(fEnd); + + if (req.debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterKeys"); + //.detail("Off1",offset1).detail("Off2",offset2).detail("ReqBegin",req.begin.getKey()).detail("ReqEnd",req.end.getKey()); + + // Offsets of zero indicate begin/end keys in this shard, which obviously means we can answer the query + // An end offset of 1 is also OK because the end key is exclusive, so if the first key of the next shard is the + // end the last actual key returned must be from this shard. A begin offset of 1 is also OK because then either + // begin is past end or equal to end (so the result is definitely empty) + if ((offset1 && offset1 != 1) || (offset2 && offset2 != 1)) { + TEST(true); // wrong_shard_server due to offset in getKeyValuesWithHopQ + // We could detect when offset1 takes us off the beginning of the database or offset2 takes us off the end, + // and return a clipped range rather than an error (since that is what the NativeAPI.getRange will do anyway + // via its "slow path"), but we would have to add some flags to the response to encode whether we went off + // the beginning and the end, since it needs that information. + //TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndHop>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2); + throw wrong_shard_server(); + } + + if (begin >= end) { + if (req.debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.Send"); + //.detail("Begin",begin).detail("End",end); + + GetKeyValuesAndHopReply none; + none.version = version; + none.more = false; + none.penalty = data->getPenalty(); + + data->checkChangeCounter(changeCounter, + KeyRangeRef(std::min(req.begin.getKey(), req.end.getKey()), + std::max(req.begin.getKey(), req.end.getKey()))); + req.reply.send(none); + } else { + state int remainingLimitBytes = req.limitBytes; + + GetKeyValuesReply _r = wait( + readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span.context, type)); + + // Hop!!! + state GetKeyValuesAndHopReply r = wait(hop(data, _r, req.hopInfo)); + + if (req.debugID.present()) + g_traceBatch.addEvent( + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterReadRange"); + //.detail("Begin",begin).detail("End",end).detail("SizeOf",r.data.size()); + data->checkChangeCounter( + changeCounter, + KeyRangeRef(std::min(begin, std::min(req.begin.getKey(), req.end.getKey())), + std::max(end, std::max(req.begin.getKey(), req.end.getKey())))); + if (EXPENSIVE_VALIDATION) { + // TODO: Only hop keys are returned, which are not supposed to be in the range. + // for (int i = 0; i < r.data.size(); i++) + // ASSERT(r.data[i].key >= begin && r.data[i].key < end); + // TODO: GetKeyValuesWithHopRequest doesn't respect limit yet. + // ASSERT(r.data.size() <= std::abs(req.limit)); + } + + /*for( int i = 0; i < r.data.size(); i++ ) { + StorageMetrics m; + m.bytesPerKSecond = r.data[i].expectedSize(); + m.iosPerKSecond = 1; //FIXME: this should be 1/r.data.size(), but we cannot do that because it is an int + data->metrics.notify(r.data[i].key, m); + }*/ + + // For performance concerns, the cost of a range read is billed to the start key and end key of the range. + int64_t totalByteSize = 0; + for (int i = 0; i < r.data.size(); i++) { + totalByteSize += r.data[i].expectedSize(); + } + if (totalByteSize > 0 && SERVER_KNOBS->READ_SAMPLING_ENABLED) { + int64_t bytesReadPerKSecond = std::max(totalByteSize, SERVER_KNOBS->EMPTY_READ_PENALTY) / 2; + data->metrics.notifyBytesReadPerKSecond(r.data[0].key, bytesReadPerKSecond); + data->metrics.notifyBytesReadPerKSecond(r.data[r.data.size() - 1].key, bytesReadPerKSecond); + } + + r.penalty = data->getPenalty(); + req.reply.send(r); + + resultSize = req.limitBytes - remainingLimitBytes; + data->counters.bytesQueried += resultSize; + data->counters.rowsQueried += r.data.size(); + if (r.data.size() == 0) { + ++data->counters.emptyQueries; + } + } + } catch (Error& e) { + if (!canReplyWith(e)) + throw; + data->sendErrorWithPenalty(req.reply, e, data->getPenalty()); + } + + data->transactionTagCounter.addRequest(req.tags, resultSize); + ++data->counters.finishedQueries; + --data->readQueueSizeMetric; + + double duration = g_network->timer() - req.requestTime(); + data->counters.readLatencySample.addMeasurement(duration); + if (data->latencyBandConfig.present()) { + int maxReadBytes = + data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); + int maxSelectorOffset = + data->latencyBandConfig.get().readConfig.maxKeySelectorOffset.orDefault(std::numeric_limits::max()); + data->counters.readLatencyBands.addMeasurement(duration, + resultSize > maxReadBytes || + abs(req.begin.offset) > maxSelectorOffset || + abs(req.end.offset) > maxSelectorOffset); + } + + return Void(); +} + ACTOR Future getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRequest req) // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read @@ -5690,6 +6163,19 @@ ACTOR Future serveGetKeyValuesRequests(StorageServer* self, FutureStream serveGetKeyValuesAndHopRequests(StorageServer* self, + FutureStream getKeyValuesAndHop) { + // TODO: Is it fine to keep TransactionLineage::Operation::GetKeyValues here? + getCurrentLineage()->modify(&TransactionLineage::operation) = TransactionLineage::Operation::GetKeyValues; + loop { + GetKeyValuesAndHopRequest req = waitNext(getKeyValuesAndHop); + + // Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade + // before doing real work + self->actors.add(self->readGuard(req, getKeyValuesAndHopQ)); + } +} + ACTOR Future serveGetKeyValuesStreamRequests(StorageServer* self, FutureStream getKeyValuesStream) { loop { @@ -5889,6 +6375,7 @@ ACTOR Future storageServerCore(StorageServer* self, StorageServerInterface self->actors.add(checkBehind(self)); self->actors.add(serveGetValueRequests(self, ssi.getValue.getFuture())); self->actors.add(serveGetKeyValuesRequests(self, ssi.getKeyValues.getFuture())); + self->actors.add(serveGetKeyValuesAndHopRequests(self, ssi.getKeyValuesAndHop.getFuture())); self->actors.add(serveGetKeyValuesStreamRequests(self, ssi.getKeyValuesStream.getFuture())); self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture())); self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture())); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index c42fcff0823..5c8981b34d3 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1097,6 +1097,7 @@ ACTOR Future storageServerRollbackRebooter(std::set storageServerRollbackRebooter(std::set(), Reference(nullptr)); @@ -1478,6 +1480,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); + DUMPTOKEN(recruited.getKeyValuesAndHop); Promise recovery; Future f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord); @@ -1574,6 +1577,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getValue); DUMPTOKEN(recruited.getKey); DUMPTOKEN(recruited.getKeyValues); + DUMPTOKEN(recruited.getKeyValuesAndHop); DUMPTOKEN(recruited.getShardState); DUMPTOKEN(recruited.waitMetrics); DUMPTOKEN(recruited.splitMetrics); @@ -1931,6 +1935,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); + DUMPTOKEN(recruited.getKeyValuesAndHop); // printf("Recruited as storageServer\n"); std::string filename = diff --git a/fdbserver/workloads/IndexPrefetchDemo.actor.cpp b/fdbserver/workloads/IndexPrefetchDemo.actor.cpp new file mode 100644 index 00000000000..079cf416644 --- /dev/null +++ b/fdbserver/workloads/IndexPrefetchDemo.actor.cpp @@ -0,0 +1,144 @@ +/* + * IndexPrefetchDemo.actor.cpp + * + * This source file is part of the FoundationDB open source project + * + * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "fdbrpc/simulator.h" +#include "fdbclient/MutationLogReader.actor.h" +#include "fdbclient/Tuple.h" +#include "fdbserver/workloads/workloads.actor.h" +#include "flow/Error.h" +#include "flow/IRandom.h" +#include "flow/flow.h" +#include "flow/actorcompiler.h" // This must be the last #include. + +const KeyRef prefix = "prefix"_sr; +const KeyRef RECORD = "RECORD"_sr; +const KeyRef INDEX = "INDEX"_sr; + +struct IndexPrefetchDemoWorkload : TestWorkload { + bool enabled; + + IndexPrefetchDemoWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { + enabled = !clientId; // only do this on the "first" client + } + + std::string description() const override { return "IndexPrefetchDemo"; } + + Future start(Database const& cx) override { + if (enabled) { + return _start(cx, this); + } + return Void(); + } + + static KeyRef primaryKey(int i) { return KeyRef("primary-key-of-record-" + std::to_string(i)); } + static KeyRef indexKey(int i) { return KeyRef("index-key-of-record-" + std::to_string(i)); } + static KeyRef data(int i) { return KeyRef("data-of-record-" + std::to_string(i)); } + + ACTOR Future fillInRecords(Database cx, int n) { + std::cout << "start fillInRecords n=" << n << std::endl; + // TODO: When n is large, split into multiple transactions. + state Transaction tr(cx); + try { + tr.reset(); + for (int i = 0; i < n; i++) { + tr.set(Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack(), + Tuple().append(data(i)).pack()); + tr.set(Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack(), + Tuple().pack()); + } + wait(tr.commit()); + std::cout << "finished fillInRecords" << std::endl; + } catch (Error& e) { + std::cout << "failed fillInRecords" << std::endl; + wait(tr.onError(e)); + } + return Void(); + } + + static void showResult(const RangeResult& result) { + std::cout << "result size: " << result.size() << std::endl; + const KeyValueRef* it = result.begin(); + for (; it != result.end(); it++) { + std::cout << "key=" << it->key.printable() << ", value=" << it->value.printable() << std::endl; + } + } + + ACTOR Future scanRange(Database cx, KeyRangeRef range) { + std::cout << "start scanRange " << range.toString() << std::endl; + // TODO: When n is large, split into multiple transactions. + state Transaction tr(cx); + try { + tr.reset(); + RangeResult result = wait(tr.getRange(range, CLIENT_KNOBS->TOO_MANY)); + showResult(result); + } catch (Error& e) { + wait(tr.onError(e)); + } + std::cout << "finished scanRange" << std::endl; + return Void(); + } + + ACTOR Future scanRangeAndHop(Database cx, KeyRange range, Key hopInfo) { + std::cout << "start scanRangeAndHop " << range.toString() << std::endl; + // TODO: When n is large, split into multiple transactions. + state Transaction tr(cx); + try { + tr.reset(); + RangeResult result = wait(tr.getRangeAndHop(KeySelector(firstGreaterOrEqual(range.begin), range.arena()), + KeySelector(firstGreaterOrEqual(range.end), range.arena()), + hopInfo, + GetRangeLimits(CLIENT_KNOBS->TOO_MANY))); + showResult(result); + // result size: 2 + // key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-2\x00, value=\x01data-of-record-2\x00 + // key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-3\x00, value=\x01data-of-record-3\x00 + } catch (Error& e) { + wait(tr.onError(e)); + } + std::cout << "finished scanRangeAndHop" << std::endl; + return Void(); + } + + ACTOR Future _start(Database cx, IndexPrefetchDemoWorkload* self) { + // TODO: Use toml to config + wait(self->fillInRecords(cx, 5)); + + wait(self->scanRange(cx, normalKeys)); + + Key someIndexesBegin = Tuple().append(prefix).append(INDEX).append(indexKey(2)).getDataAsStandalone(); + Key someIndexesEnd = Tuple().append(prefix).append(INDEX).append(indexKey(4)).getDataAsStandalone(); + state KeyRange someIndexes = KeyRangeRef(someIndexesBegin, someIndexesEnd); + wait(self->scanRange(cx, someIndexes)); + + Tuple hopInfoTuple; + hopInfoTuple << prefix << RECORD << "{K[3]}"_sr; + Key hopInfo = hopInfoTuple.getDataAsStandalone(); + wait(self->scanRangeAndHop(cx, someIndexes, hopInfo)); + return Void(); + } + + Future check(Database const& cx) override { return true; } + + void getMetrics(std::vector& m) override {} +}; + +WorkloadFactory IndexPrefetchDemoWorkloadFactory("IndexPrefetchDemo"); diff --git a/flow/Platform.h b/flow/Platform.h index e07bc1a3329..889d2a0b17d 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -613,6 +613,7 @@ inline static void flushOutputStreams() { #if defined(_MSC_VER) #define DLLEXPORT __declspec(dllexport) #elif defined(__GNUG__) +#undef DLLEXPORT #define DLLEXPORT __attribute__((visibility("default"))) #else #error Missing symbol export diff --git a/flow/error_definitions.h b/flow/error_definitions.h index e468e46801c..6e969d2a2ea 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -159,6 +159,12 @@ ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback cal ERROR( invalid_config_db_range_read, 2027, "Invalid configuration database range read" ) ERROR( invalid_config_db_key, 2028, "Invalid configuration database key provided" ) ERROR( invalid_config_path, 2029, "Invalid configuration path" ) +ERROR( hop_bad_index, 2030, "The index in K[] or V[] is not a valid number or out of range" ) +ERROR( hop_no_such_key, 2031, "A hop key is not set in database" ) +ERROR( hop_bad_range_decriptor, 2032, "\"{...}\" must be the last element of the hop info tuple" ) +ERROR( hop_quick_get_key_values_has_more, 2033, "One of the secondary range queries is too large" ) +ERROR( hop_quick_get_value_miss, 2034, "Find a hop key that is not served in the same SS" ) +ERROR( hop_quick_get_key_values_miss, 2035, "Find a hop range that is not served in the same SS" ) ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" ) ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 22c77e091d9..e5f52a2de3d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -150,6 +150,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/MemoryLifetime.toml) add_fdb_test(TEST_FILES fast/MoveKeysCycle.toml) add_fdb_test(TEST_FILES fast/MutationLogReaderCorrectness.toml) + add_fdb_test(TEST_FILES fast/IndexPrefetchDemo.toml) add_fdb_test(TEST_FILES fast/ProtocolVersion.toml) add_fdb_test(TEST_FILES fast/RandomSelector.toml) add_fdb_test(TEST_FILES fast/RandomUnitTests.toml) diff --git a/tests/fast/IndexPrefetchDemo.toml b/tests/fast/IndexPrefetchDemo.toml new file mode 100644 index 00000000000..dbdca31f8d5 --- /dev/null +++ b/tests/fast/IndexPrefetchDemo.toml @@ -0,0 +1,6 @@ +[[test]] +testTitle = 'IndexPrefetchDemo' +useDB = true + + [[test.workload]] + testName = 'IndexPrefetchDemo' From 6c98e358938c3953de25e012d4e4dfecc950941c Mon Sep 17 00:00:00 2001 From: Tao Lin Date: Wed, 3 Nov 2021 13:32:01 -0700 Subject: [PATCH 51/69] Rename Hop to FlatMap --- bindings/c/fdb_c.cpp | 74 +++---- bindings/c/foundationdb/fdb_c.h | 34 ++-- bindings/c/test/unit/fdb_api.cpp | 66 +++--- bindings/c/test/unit/fdb_api.hpp | 32 +-- bindings/c/test/unit/unit_tests.cpp | 78 ++++---- bindings/java/fdbJNI.cpp | 75 +++---- ... RangeAndFlatMapQueryIntegrationTest.java} | 31 +-- .../foundationdb/FakeFDBTransaction.java | 4 +- .../apple/foundationdb/FDBTransaction.java | 41 ++-- .../com/apple/foundationdb/RangeQuery.java | 16 +- .../apple/foundationdb/ReadTransaction.java | 6 +- bindings/java/src/tests.cmake | 2 +- .../release-notes/release-notes-700.rst | 6 +- fdbclient/DatabaseContext.h | 2 +- fdbclient/IClientApi.h | 12 +- fdbclient/ISingleThreadTransaction.h | 12 +- fdbclient/MultiVersionTransaction.actor.cpp | 62 +++--- fdbclient/MultiVersionTransaction.h | 58 +++--- fdbclient/NativeAPI.actor.cpp | 152 +++++++------- fdbclient/NativeAPI.actor.h | 18 +- fdbclient/PaxosConfigTransaction.h | 12 +- fdbclient/ReadYourWrites.actor.cpp | 64 +++--- fdbclient/ReadYourWrites.h | 12 +- fdbclient/SimpleConfigTransaction.h | 12 +- fdbclient/StorageServerInterface.cpp | 20 +- fdbclient/StorageServerInterface.h | 25 ++- fdbclient/ThreadSafeTransaction.cpp | 16 +- fdbclient/ThreadSafeTransaction.h | 12 +- fdbrpc/TSSComparison.h | 8 +- fdbserver/storageserver.actor.cpp | 188 +++++++++--------- fdbserver/worker.actor.cpp | 10 +- .../workloads/IndexPrefetchDemo.actor.cpp | 23 ++- flow/error_definitions.h | 12 +- 33 files changed, 599 insertions(+), 596 deletions(-) rename bindings/java/src/integration/com/apple/foundationdb/{RangeAndHopQueryIntegrationTest.java => RangeAndFlatMapQueryIntegrationTest.java} (88%) diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index f7a00ad4906..e5545251f2f 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -523,32 +523,32 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, .extractPtr()); } -FDBFuture* fdb_transaction_get_range_and_hop_impl(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { +FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse); if (r != nullptr) return r; return ( FDBFuture*)(TXN(tr) - ->getRangeAndHop( + ->getRangeAndFlatMap( KeySelectorRef(KeyRef(begin_key_name, begin_key_name_length), begin_or_equal, begin_offset), KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset), - StringRef(hop_info_name, hop_info_name_length), + StringRef(mapper_name, mapper_name_length), GetRangeLimits(limit, target_bytes), snapshot, reverse) @@ -556,23 +556,23 @@ FDBFuture* fdb_transaction_get_range_and_hop_impl(FDBTransaction* tr, } // TODO: Support FDB_API_ADDED in generate_asm.py and then this can be replaced with fdb_api_ptr_unimpl. -FDBFuture* fdb_transaction_get_range_and_hop_v699(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { +FDBFuture* fdb_transaction_get_range_and_flat_map_v699(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { fprintf(stderr, "UNIMPLEMENTED FDB API FUNCTION\n"); abort(); } @@ -768,7 +768,7 @@ extern "C" DLLEXPORT fdb_error_t fdb_select_api_version_impl(int runtime_version // WARNING: use caution when implementing removed functions by calling public API functions. This can lead to // undesired behavior when using the multi-version API. Instead, it is better to have both the removed and public // functions call an internal implementation function. See fdb_create_database_impl for an example. - FDB_API_CHANGED(fdb_transaction_get_range_and_hop, 700); + FDB_API_CHANGED(fdb_transaction_get_range_and_flat_map, 700); FDB_API_REMOVED(fdb_future_get_version, 620); FDB_API_REMOVED(fdb_create_cluster, 610); FDB_API_REMOVED(fdb_cluster_create_database, 610); diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index 5329fbd7d04..d3c65537c52 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -244,23 +244,23 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range(FDBTransaction fdb_bool_t reverse); #endif -DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_hop(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); +DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_flat_map(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + uint8_t const* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); DLLEXPORT void fdb_transaction_set(FDBTransaction* tr, uint8_t const* key_name, diff --git a/bindings/c/test/unit/fdb_api.cpp b/bindings/c/test/unit/fdb_api.cpp index c628d618b14..f15db95c62c 100644 --- a/bindings/c/test/unit/fdb_api.cpp +++ b/bindings/c/test/unit/fdb_api.cpp @@ -193,39 +193,39 @@ KeyValueArrayFuture Transaction::get_range(const uint8_t* begin_key_name, reverse)); } -KeyValueArrayFuture Transaction::get_range_and_hop(const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - return KeyValueArrayFuture(fdb_transaction_get_range_and_hop(tr_, - begin_key_name, - begin_key_name_length, - begin_or_equal, - begin_offset, - end_key_name, - end_key_name_length, - end_or_equal, - end_offset, - hop_info_name, - hop_info_name_length, - limit, - target_bytes, - mode, - iteration, - snapshot, - reverse)); +KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + return KeyValueArrayFuture(fdb_transaction_get_range_and_flat_map(tr_, + begin_key_name, + begin_key_name_length, + begin_or_equal, + begin_offset, + end_key_name, + end_key_name_length, + end_or_equal, + end_offset, + mapper_name, + mapper_name_length, + limit, + target_bytes, + mode, + iteration, + snapshot, + reverse)); } EmptyFuture Transaction::watch(std::string_view key) { diff --git a/bindings/c/test/unit/fdb_api.hpp b/bindings/c/test/unit/fdb_api.hpp index c03b720b8dd..fb1304a26ee 100644 --- a/bindings/c/test/unit/fdb_api.hpp +++ b/bindings/c/test/unit/fdb_api.hpp @@ -220,22 +220,22 @@ class Transaction final { fdb_bool_t reverse); // Returns a future which will be set to an FDBKeyValue array. - KeyValueArrayFuture get_range_and_hop(const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); + KeyValueArrayFuture get_range_and_flat_map(const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); // Wrapper around fdb_transaction_watch. Returns a future representing an // empty value. diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index 5dc477a0ed3..f59c7f953bc 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -225,39 +225,39 @@ GetRangeResult get_range(fdb::Transaction& tr, return GetRangeResult{ results, out_more != 0, 0 }; } -GetRangeResult get_range_and_hop(fdb::Transaction& tr, - const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* hop_info_name, - int hop_info_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - fdb::KeyValueArrayFuture f1 = tr.get_range_and_hop(begin_key_name, - begin_key_name_length, - begin_or_equal, - begin_offset, - end_key_name, - end_key_name_length, - end_or_equal, - end_offset, - hop_info_name, - hop_info_name_length, - limit, - target_bytes, - mode, - iteration, - snapshot, - reverse); +GetRangeResult get_range_and_flat_map(fdb::Transaction& tr, + const uint8_t* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + const uint8_t* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + const uint8_t* mapper_name, + int mapper_name_length, + int limit, + int target_bytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse) { + fdb::KeyValueArrayFuture f1 = tr.get_range_and_flat_map(begin_key_name, + begin_key_name_length, + begin_or_equal, + begin_offset, + end_key_name, + end_key_name_length, + end_or_equal, + end_offset, + mapper_name, + mapper_name_length, + limit, + target_bytes, + mode, + iteration, + snapshot, + reverse); fdb_error_t err = wait_future(f1); if (err) { @@ -895,7 +895,7 @@ static std::string recordValue(const int i) { return Tuple().append(dataOfRecord(i)).pack().toString(); } -TEST_CASE("fdb_transaction_get_range_and_hop") { +TEST_CASE("fdb_transaction_get_range_and_flat_map") { // Note: The user requested `prefix` should be added as the first element of the tuple that forms the key, rather // than the prefix of the key. So we don't use key() or create_data() in this test. std::map data; @@ -905,19 +905,19 @@ TEST_CASE("fdb_transaction_get_range_and_hop") { } insert_data(db, data); - std::string hop_info = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString(); + std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString(); fdb::Transaction tr(db); - // get_range_and_hop is only support without RYW. This is a must!!! + // get_range_and_flat_map is only support without RYW. This is a must!!! fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0)); while (1) { - auto result = get_range_and_hop( + auto result = get_range_and_flat_map( tr, // [0, 1] FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()), FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()), - (const uint8_t*)hop_info.c_str(), - hop_info.size(), + (const uint8_t*)mapper.c_str(), + mapper.size(), /* limit */ 0, /* target_bytes */ 0, /* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL, diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 4caea9d89a7..2a4ba668beb 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -756,23 +756,24 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return (jlong)f; } -JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndHop(JNIEnv* jenv, - jobject, - jlong tPtr, - jbyteArray keyBeginBytes, - jboolean orEqualBegin, - jint offsetBegin, - jbyteArray keyEndBytes, - jboolean orEqualEnd, - jint offsetEnd, - jbyteArray hopInfoBytes, - jint rowLimit, - jint targetBytes, - jint streamingMode, - jint iteration, - jboolean snapshot, - jboolean reverse) { - if (!tPtr || !keyBeginBytes || !keyEndBytes || !hopInfoBytes) { +JNIEXPORT jlong JNICALL +Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEnv* jenv, + jobject, + jlong tPtr, + jbyteArray keyBeginBytes, + jboolean orEqualBegin, + jint offsetBegin, + jbyteArray keyEndBytes, + jboolean orEqualEnd, + jint offsetEnd, + jbyteArray mapperBytes, + jint rowLimit, + jint targetBytes, + jint streamingMode, + jint iteration, + jboolean snapshot, + jboolean reverse) { + if (!tPtr || !keyBeginBytes || !keyEndBytes || !mapperBytes) { throwParamNotNull(jenv); return 0; } @@ -793,8 +794,8 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return 0; } - uint8_t* barrHopInfo = (uint8_t*)jenv->GetByteArrayElements(hopInfoBytes, JNI_NULL); - if (!barrHopInfo) { + uint8_t* barrMapper = (uint8_t*)jenv->GetByteArrayElements(mapperBytes, JNI_NULL); + if (!barrMapper) { jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); if (!jenv->ExceptionOccurred()) @@ -802,26 +803,26 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return 0; } - FDBFuture* f = fdb_transaction_get_range_and_hop(tr, - barrBegin, - jenv->GetArrayLength(keyBeginBytes), - orEqualBegin, - offsetBegin, - barrEnd, - jenv->GetArrayLength(keyEndBytes), - orEqualEnd, - offsetEnd, - barrHopInfo, - jenv->GetArrayLength(hopInfoBytes), - rowLimit, - targetBytes, - (FDBStreamingMode)streamingMode, - iteration, - snapshot, - reverse); + FDBFuture* f = fdb_transaction_get_range_and_flat_map(tr, + barrBegin, + jenv->GetArrayLength(keyBeginBytes), + orEqualBegin, + offsetBegin, + barrEnd, + jenv->GetArrayLength(keyEndBytes), + orEqualEnd, + offsetEnd, + barrMapper, + jenv->GetArrayLength(mapperBytes), + rowLimit, + targetBytes, + (FDBStreamingMode)streamingMode, + iteration, + snapshot, + reverse); jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); - jenv->ReleaseByteArrayElements(hopInfoBytes, (jbyte*)barrHopInfo, JNI_ABORT); + jenv->ReleaseByteArrayElements(mapperBytes, (jbyte*)barrMapper, JNI_ABORT); return (jlong)f; } diff --git a/bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java b/bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java similarity index 88% rename from bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java rename to bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java index 2741bcb368d..7f418b5b2d0 100644 --- a/bindings/java/src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java +++ b/bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java @@ -1,5 +1,5 @@ /* - * RangeAndHopQueryIntegrationTest.java + * RangeAndFlatMapQueryIntegrationTest.java * * This source file is part of the FoundationDB open source project * @@ -40,7 +40,7 @@ import org.junit.jupiter.api.extension.ExtendWith; @ExtendWith(RequiresDatabase.class) -class RangeAndHopQueryIntegrationTest { +class RangeAndFlatMapQueryIntegrationTest { private static final FDB fdb = FDB.selectAPIVersion(710); public String databaseArg = null; private Database openFDB() { return fdb.open(databaseArg); } @@ -86,7 +86,7 @@ private static String getArgFromEnv() { return cluster; } public static void main(String[] args) throws Exception { - final RangeAndHopQueryIntegrationTest test = new RangeAndHopQueryIntegrationTest(); + final RangeAndFlatMapQueryIntegrationTest test = new RangeAndFlatMapQueryIntegrationTest(); test.databaseArg = getArgFromEnv(); test.clearDatabase(); test.comparePerformance(); @@ -103,7 +103,7 @@ void comparePerformance() { try (Database db = openFDB()) { insertRecordsWithIndexes(numRecords, db); instrument(rangeQueryAndGet, "rangeQueryAndGet", db); - instrument(rangeQueryAndHop, "rangeQueryAndHop", db); + instrument(rangeQueryAndFlatMap, "rangeQueryAndFlatMap", db); } } @@ -176,14 +176,15 @@ public interface RangeQueryWithIndex { return null; }); - RangeQueryWithIndex rangeQueryAndHop = (int begin, int end, Database db) -> db.run(tr -> { + RangeQueryWithIndex rangeQueryAndFlatMap = (int begin, int end, Database db) -> db.run(tr -> { try { tr.options().setReadYourWritesDisable(); - List kvs = tr.getRangeAndHop(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), - KeySelector.firstGreaterOrEqual(indexEntryKey(end)), HOP_INFO, - ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) - .asList() - .get(); + List kvs = + tr.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), + KeySelector.firstGreaterOrEqual(indexEntryKey(end)), HOP_INFO, + ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) + .asList() + .get(); Assertions.assertEquals(end - begin, kvs.size()); if (validate) { @@ -205,7 +206,7 @@ void assertByteArrayEquals(byte[] expected, byte[] actual) { } @Test - void rangeAndHopQueryOverMultipleRows() throws Exception { + void rangeAndFlatMapQueryOverMultipleRows() throws Exception { try (Database db = openFDB()) { insertRecordsWithIndexes(3, db); @@ -215,13 +216,13 @@ void rangeAndHopQueryOverMultipleRows() throws Exception { } db.run(tr -> { - // getRangeAndHop is only support without RYW. This is a must!!! + // getRangeAndFlatMap is only support without RYW. This is a must!!! tr.options().setReadYourWritesDisable(); Iterator kvs = - tr.getRangeAndHop(KeySelector.firstGreaterOrEqual(indexEntryKey(0)), - KeySelector.firstGreaterThan(indexEntryKey(1)), HOP_INFO, - ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) + tr.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(0)), + KeySelector.firstGreaterThan(indexEntryKey(1)), HOP_INFO, + ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) .iterator(); Iterator expected_data_of_records_iter = expected_data_of_records.iterator(); while (expected_data_of_records_iter.hasNext()) { diff --git a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java index 1f5cbce2615..0c5a121c64a 100644 --- a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java +++ b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java @@ -89,8 +89,8 @@ public CompletableFuture get(byte[] key) { @Override protected FutureResults getRange_internal(KeySelector begin, KeySelector end, - // TODO: hop is not supported in FakeFDBTransaction yet. - byte[] hopInfo, // Nullable + // TODO: map is not supported in FakeFDBTransaction yet. + byte[] mapper, // Nullable int rowLimit, int targetBytes, int streamingMode, int iteration, boolean isSnapshot, boolean reverse) { numRangeCalls++; diff --git a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java index 0503dbc5022..9bd99c892d6 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java @@ -92,12 +92,12 @@ public CompletableFuture getRangeSplitPoints(Range range, long c } @Override - public AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, - boolean reverse, StreamingMode mode) { - if (hopInfo == null) { - throw new IllegalArgumentException("HopInfo must be non-null"); + public AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, + boolean reverse, StreamingMode mode) { + if (mapper == null) { + throw new IllegalArgumentException("Mapper must be non-null"); } - return new RangeQuery(FDBTransaction.this, true, begin, end, hopInfo, limit, reverse, mode, eventKeeper); + return new RangeQuery(FDBTransaction.this, true, begin, end, mapper, limit, reverse, mode, eventKeeper); } /////////////////// @@ -348,12 +348,12 @@ public CompletableFuture getRangeSplitPoints(Range range, long c } @Override - public AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, - boolean reverse, StreamingMode mode) { - if (hopInfo == null) { - throw new IllegalArgumentException("HopInfo must be non-null"); + public AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, + boolean reverse, StreamingMode mode) { + if (mapper == null) { + throw new IllegalArgumentException("Mapper must be non-null"); } - return new RangeQuery(this, false, begin, end, hopInfo, limit, reverse, mode, eventKeeper); + return new RangeQuery(this, false, begin, end, mapper, limit, reverse, mode, eventKeeper); } /////////////////// @@ -434,7 +434,7 @@ public Database getDatabase() { // Users of this function must close the returned FutureResults when finished protected FutureResults getRange_internal(KeySelector begin, KeySelector end, - byte[] hopInfo, // Nullable + byte[] mapper, // Nullable int rowLimit, int targetBytes, int streamingMode, int iteration, boolean isSnapshot, boolean reverse) { if (eventKeeper != null) { @@ -447,13 +447,13 @@ protected FutureResults getRange_internal(KeySelector begin, KeySelector end, begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode, iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/ return new FutureResults( - hopInfo == null + mapper == null ? Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(), end.orEqual(), end.getOffset(), rowLimit, targetBytes, streamingMode, iteration, isSnapshot, reverse) - : Transaction_getRangeAndHop(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), - end.getKey(), end.orEqual(), end.getOffset(), hopInfo, rowLimit, - targetBytes, streamingMode, iteration, isSnapshot, reverse), + : Transaction_getRangeAndFlatMap(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), + end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit, + targetBytes, streamingMode, iteration, isSnapshot, reverse), FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper); } finally { pointerReadLock.unlock(); @@ -793,11 +793,12 @@ private native long Transaction_getRange(long cPtr, byte[] keyEnd, boolean orEqualEnd, int offsetEnd, int rowLimit, int targetBytes, int streamingMode, int iteration, boolean isSnapshot, boolean reverse); - private native long Transaction_getRangeAndHop(long cPtr, byte[] keyBegin, boolean orEqualBegin, int offsetBegin, - byte[] keyEnd, boolean orEqualEnd, int offsetEnd, - byte[] hopInfo, // Nonnull - int rowLimit, int targetBytes, int streamingMode, int iteration, - boolean isSnapshot, boolean reverse); + private native long Transaction_getRangeAndFlatMap(long cPtr, byte[] keyBegin, boolean orEqualBegin, + int offsetBegin, byte[] keyEnd, boolean orEqualEnd, + int offsetEnd, + byte[] mapper, // Nonnull + int rowLimit, int targetBytes, int streamingMode, int iteration, + boolean isSnapshot, boolean reverse); private native void Transaction_addConflictRange(long cPtr, byte[] keyBegin, byte[] keyEnd, int conflictRangeType); private native void Transaction_set(long cPtr, byte[] key, byte[] value); diff --git a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java index 469f015d1c6..f91b00471ad 100644 --- a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java +++ b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java @@ -49,19 +49,19 @@ class RangeQuery implements AsyncIterable { private final FDBTransaction tr; private final KeySelector begin; private final KeySelector end; - private final byte[] hopInfo; // Nullable + private final byte[] mapper; // Nullable private final boolean snapshot; private final int rowLimit; private final boolean reverse; private final StreamingMode streamingMode; private final EventKeeper eventKeeper; - RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] hopInfo, + RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper, int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { this.tr = transaction; this.begin = begin; this.end = end; - this.hopInfo = hopInfo; + this.mapper = mapper; this.snapshot = isSnapshot; this.rowLimit = rowLimit; this.reverse = reverse; @@ -69,7 +69,7 @@ class RangeQuery implements AsyncIterable { this.eventKeeper = eventKeeper; } - // RangeQueryAndHop + // RangeQueryAndFlatMap RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { this(transaction, isSnapshot, begin, end, null, rowLimit, reverse, streamingMode, eventKeeper); @@ -92,7 +92,7 @@ public CompletableFuture> asList() { // if the streaming mode is EXACT, try and grab things as one chunk if(mode == StreamingMode.EXACT) { - FutureResults range = tr.getRange_internal(this.begin, this.end, this.hopInfo, this.rowLimit, 0, + FutureResults range = tr.getRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0, StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse); return range.thenApply(result -> result.get().values) .whenComplete((result, e) -> range.close()); @@ -100,8 +100,8 @@ public CompletableFuture> asList() { // If the streaming mode is not EXACT, simply collect the results of an // iteration into a list - return AsyncUtil.collect( - new RangeQuery(tr, snapshot, begin, end, hopInfo, rowLimit, reverse, mode, eventKeeper), tr.getExecutor()); + return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper), + tr.getExecutor()); } /** @@ -229,7 +229,7 @@ private synchronized void startNextFetch() { nextFuture = new CompletableFuture<>(); final long sTime = System.nanoTime(); - fetchingChunk = tr.getRange_internal(begin, end, hopInfo, rowsLimited ? rowsRemaining : 0, 0, + fetchingChunk = tr.getRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(), ++iteration, snapshot, reverse); BiConsumer cons = new FetchComplete(fetchingChunk,nextFuture); diff --git a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java index ba4b674d5a5..699dfd3ec09 100644 --- a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java @@ -434,7 +434,7 @@ AsyncIterable getRange(Range range, * * @param begin the beginning of the range (inclusive) * @param end the end of the range (exclusive) - * @param hopInfo TODO + * @param mapper TODO * @param limit the maximum number of results to return. Limits results to the * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query * should not limit the number of results. If {@code reverse} is {@code true} rows @@ -458,8 +458,8 @@ AsyncIterable getRange(Range range, *

* @return a handle to access the results of the asynchronous call */ - AsyncIterable getRangeAndHop(KeySelector begin, KeySelector end, byte[] hopInfo, int limit, - boolean reverse, StreamingMode mode); + AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, + boolean reverse, StreamingMode mode); /** * Gets an estimate for the number of bytes stored in the given range. diff --git a/bindings/java/src/tests.cmake b/bindings/java/src/tests.cmake index ae00c389943..b84c148ac25 100644 --- a/bindings/java/src/tests.cmake +++ b/bindings/java/src/tests.cmake @@ -52,7 +52,7 @@ set(JAVA_INTEGRATION_TESTS src/integration/com/apple/foundationdb/CycleMultiClientIntegrationTest.java src/integration/com/apple/foundationdb/SidebandMultiThreadClientTest.java src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java - src/integration/com/apple/foundationdb/RangeAndHopQueryIntegrationTest.java + src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java ) # Resources that are used in integration testing, but are not explicitly test files (JUnit rules, diff --git a/documentation/sphinx/source/release-notes/release-notes-700.rst b/documentation/sphinx/source/release-notes/release-notes-700.rst index b7a569de3f8..a1abd083665 100644 --- a/documentation/sphinx/source/release-notes/release-notes-700.rst +++ b/documentation/sphinx/source/release-notes/release-notes-700.rst @@ -30,7 +30,7 @@ Features * Improved the efficiency with which storage servers replicate data between themselves. `(PR #5017) `_ * Added support to ``exclude command`` to exclude based on locality match. `(PR #5113) `_ * Add the ``trace_partial_file_suffix`` network option. This option will give unfinished trace files a special suffix to indicate they're not complete yet. When the trace file is complete, it is renamed to remove the suffix. `(PR #5328) `_ -* Added "get range and hop" feature with new APIs (see Bindings section). Storage servers are able to generate the keys in the queries based on another query. With this, upper layer can push some computations down to FDB, to improve latency and bandwidth when read. `(PR #5609) `_ +* Added "get range and flat map" feature with new APIs (see Bindings section). Storage servers are able to generate the keys in the queries based on another query. With this, upper layer can push some computations down to FDB, to improve latency and bandwidth when read. `(PR #5609) `_ Performance ----------- @@ -87,8 +87,8 @@ Bindings * C: Added a function, ``fdb_database_create_snapshot``, to create a snapshot of the database. `(PR #4241) `_ * C: Added ``fdb_database_get_main_thread_busyness`` function to report how busy a client's main thread is. `(PR #4504) `_ * Java: Added ``Database.getMainThreadBusyness`` function to report how busy a client's main thread is. `(PR #4564) `_ -* C: Added ``fdb_transaction_get_range_and_hop`` function to support running queries based on another query in one request. `(PR #5609) `_ -* Java: Added ``Transaction.getRangeAndHop`` function to support running queries based on another query in one request. `(PR #5609) `_ +* C: Added ``fdb_transaction_get_range_and_flat_map`` function to support running queries based on another query in one request. `(PR #5609) `_ +* Java: Added ``Transaction.getRangeAndFlatMap`` function to support running queries based on another query in one request. `(PR #5609) `_ Other Changes ------------- diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 6e4a22247d0..4c038a6fc0b 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -369,7 +369,7 @@ class DatabaseContext : public ReferenceCounted, public FastAll Counter transactionGetKeyRequests; Counter transactionGetValueRequests; Counter transactionGetRangeRequests; - Counter transactionGetRangeAndHopRequests; + Counter transactionGetRangeAndFlatMapRequests; Counter transactionGetRangeStreamRequests; Counter transactionWatchRequests; Counter transactionGetAddressesForKeyRequests; diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index bda30afa708..5562ad3ba6e 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -59,12 +59,12 @@ class ITransaction { GetRangeLimits limits, bool snapshot = false, bool reverse = false) = 0; - virtual ThreadFuture getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) = 0; + virtual ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot = false, + bool reverse = false) = 0; virtual ThreadFuture>> getAddressesForKey(const KeyRef& key) = 0; virtual ThreadFuture> getVersionstamp() = 0; diff --git a/fdbclient/ISingleThreadTransaction.h b/fdbclient/ISingleThreadTransaction.h index edd16103d3c..62336a15d72 100644 --- a/fdbclient/ISingleThreadTransaction.h +++ b/fdbclient/ISingleThreadTransaction.h @@ -63,12 +63,12 @@ class ISingleThreadTransaction : public ReferenceCounted getRangeAndHop(KeySelector begin, - KeySelector end, - Key hopInfo, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) = 0; + virtual Future getRangeAndFlatMap(KeySelector begin, + KeySelector end, + Key mapper, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) = 0; virtual Future>> getAddressesForKey(Key const& key) = 0; virtual Future>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0; virtual Future getEstimatedRangeSizeBytes(KeyRange const& keys) = 0; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index f664cccd4d7..32603d1f53d 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -141,29 +141,29 @@ ThreadFuture DLTransaction::getRange(const KeyRangeRef& keys, return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse); } -ThreadFuture DLTransaction::getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) { - FdbCApi::FDBFuture* f = api->transactionGetRangeAndHop(tr, - begin.getKey().begin(), - begin.getKey().size(), - begin.orEqual, - begin.offset, - end.getKey().begin(), - end.getKey().size(), - end.orEqual, - end.offset, - hopInfo.begin(), - hopInfo.size(), - limits.rows, - limits.bytes, - FDB_STREAMING_MODE_EXACT, - 0, - snapshot, - reverse); +ThreadFuture DLTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) { + FdbCApi::FDBFuture* f = api->transactionGetRangeAndFlatMap(tr, + begin.getKey().begin(), + begin.getKey().size(), + begin.orEqual, + begin.offset, + end.getKey().begin(), + end.getKey().size(), + end.orEqual, + end.offset, + mapper.begin(), + mapper.size(), + limits.rows, + limits.bytes, + FDB_STREAMING_MODE_EXACT, + 0, + snapshot, + reverse); return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { const FdbCApi::FDBKeyValue* kvs; int count; @@ -487,7 +487,7 @@ void DLApi::init() { loadClientFunction(&api->transactionGetKey, lib, fdbCPath, "fdb_transaction_get_key"); loadClientFunction(&api->transactionGetAddressesForKey, lib, fdbCPath, "fdb_transaction_get_addresses_for_key"); loadClientFunction(&api->transactionGetRange, lib, fdbCPath, "fdb_transaction_get_range"); - loadClientFunction(&api->transactionGetRangeAndHop, lib, fdbCPath, "fdb_transaction_get_range_and_hop"); + loadClientFunction(&api->transactionGetRangeAndFlatMap, lib, fdbCPath, "fdb_transaction_get_range_and_flat_map"); loadClientFunction( &api->transactionGetVersionstamp, lib, fdbCPath, "fdb_transaction_get_versionstamp", headerVersion >= 410); loadClientFunction(&api->transactionSet, lib, fdbCPath, "fdb_transaction_set"); @@ -767,14 +767,14 @@ ThreadFuture MultiVersionTransaction::getRange(const KeyRangeRef& k return abortableFuture(f, tr.onChange); } -ThreadFuture MultiVersionTransaction::getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture MultiVersionTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) { auto tr = getTransaction(); - auto f = tr.transaction ? tr.transaction->getRangeAndHop(begin, end, hopInfo, limits, snapshot, reverse) + auto f = tr.transaction ? tr.transaction->getRangeAndFlatMap(begin, end, mapper, limits, snapshot, reverse) : makeTimeout(); return abortableFuture(f, tr.onChange); } diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index e772a7d50ed..50e21bca576 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -118,23 +118,23 @@ struct FdbCApi : public ThreadSafeReferenceCounted { int iteration, fdb_bool_t snapshot, fdb_bool_t reverse); - FDBFuture* (*transactionGetRangeAndHop)(FDBTransaction* tr, - uint8_t const* beginKeyName, - int beginKeyNameLength, - fdb_bool_t beginOrEqual, - int beginOffset, - uint8_t const* endKeyName, - int endKeyNameLength, - fdb_bool_t endOrEqual, - int endOffset, - uint8_t const* hop_info_name, - int hop_info_name_length, - int limit, - int targetBytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); + FDBFuture* (*transactionGetRangeAndFlatMap)(FDBTransaction* tr, + uint8_t const* beginKeyName, + int beginKeyNameLength, + fdb_bool_t beginOrEqual, + int beginOffset, + uint8_t const* endKeyName, + int endKeyNameLength, + fdb_bool_t endOrEqual, + int endOffset, + uint8_t const* mapper_name, + int mapper_name_length, + int limit, + int targetBytes, + FDBStreamingMode mode, + int iteration, + fdb_bool_t snapshot, + fdb_bool_t reverse); FDBFuture* (*transactionGetVersionstamp)(FDBTransaction* tr); void (*transactionSet)(FDBTransaction* tr, @@ -236,12 +236,12 @@ class DLTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; + ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; @@ -383,12 +383,12 @@ class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted< GetRangeLimits limits, bool snapshot = false, bool reverse = false) override; - ThreadFuture getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; + ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index c3f542b34ac..91c80a5ad9d 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -160,8 +160,8 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics)); - queueModel.updateTssEndpoint(ssi.getKeyValuesAndHop.getEndpoint().token.first(), - TSSEndpointData(tssi.id(), tssi.getKeyValuesAndHop.getEndpoint(), metrics)); + queueModel.updateTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first(), + TSSEndpointData(tssi.id(), tssi.getKeyValuesAndFlatMap.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValuesStream.getEndpoint(), metrics)); @@ -185,7 +185,7 @@ void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) { queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first()); - queueModel.removeTssEndpoint(ssi.getKeyValuesAndHop.getEndpoint().token.first()); + queueModel.removeTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first()); @@ -1199,7 +1199,7 @@ DatabaseContext::DatabaseContext(Reference watchValueMap(Future version, return Void(); } -template -void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesMaybeHopRequest& req) { +template +void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesFamilyRequest& req) { if (limits.bytes != 0) { if (!limits.hasRowLimit()) req.limit = CLIENT_KNOBS->REPLY_BYTE_LIMIT; // Can't get more than this many rows anyway @@ -3055,22 +3055,22 @@ void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesMa } } -template -RequestStream StorageServerInterface::*getRangeRequestStream() { - if constexpr (std::is_same::value) { +template +RequestStream StorageServerInterface::*getRangeRequestStream() { + if constexpr (std::is_same::value) { return &StorageServerInterface::getKeyValues; - } else if (std::is_same::value) { - return &StorageServerInterface::getKeyValuesAndHop; + } else if (std::is_same::value) { + return &StorageServerInterface::getKeyValuesAndFlatMap; } else { UNREACHABLE(); } } -ACTOR template +ACTOR template Future getExactRange(Database cx, Version version, KeyRange keys, - Key hopInfo, + Key mapper, GetRangeLimits limits, Reverse reverse, TransactionInfo info, @@ -3085,16 +3085,16 @@ Future getExactRange(Database cx, keys, CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, reverse, - getRangeRequestStream(), + getRangeRequestStream(), info)); ASSERT(locations.size()); state int shard = 0; loop { const KeyRangeRef& range = locations[shard].first; - GetKeyValuesMaybeHopRequest req; - req.hopInfo = hopInfo; - req.arena.dependsOn(hopInfo.arena()); + GetKeyValuesFamilyRequest req; + req.mapper = mapper; + req.arena.dependsOn(mapper.arena()); req.version = version; req.begin = firstGreaterOrEqual(range.begin); @@ -3125,14 +3125,14 @@ Future getExactRange(Database cx, .detail("Servers", locations[shard].second->description());*/ } ++cx->transactionPhysicalReads; - state GetKeyValuesMaybeHopReply rep; + state GetKeyValuesFamilyReply rep; try { choose { when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); } - when(GetKeyValuesMaybeHopReply _rep = + when(GetKeyValuesFamilyReply _rep = wait(loadBalance(cx.getPtr(), locations[shard].second, - getRangeRequestStream(), + getRangeRequestStream(), req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3182,7 +3182,7 @@ Future getExactRange(Database cx, .detail("BlockBytes", rep.data.expectedSize()); ASSERT(false); } - TEST(true); // GetKeyValuesMaybeHopReply.more in getExactRange + TEST(true); // GetKeyValuesFamilyReply.more in getExactRange // Make next request to the same shard with a beginning key just after the last key returned if (reverse) locations[shard].first = @@ -3258,12 +3258,12 @@ Future resolveKey(Database const& cx, return getKey(cx, key, version, info, tags); } -ACTOR template +ACTOR template Future getRangeFallback(Database cx, Version version, KeySelector begin, KeySelector end, - Key hopInfo, + Key mapper, GetRangeLimits limits, Reverse reverse, TransactionInfo info, @@ -3290,8 +3290,8 @@ Future getRangeFallback(Database cx, // if b is allKeys.begin, we have either read through the beginning of the database, // or allKeys.begin exists in the database and will be part of the conflict range anyways - RangeResult _r = wait(getExactRange( - cx, version, KeyRangeRef(b, e), hopInfo, limits, reverse, info, tags)); + RangeResult _r = wait(getExactRange( + cx, version, KeyRangeRef(b, e), mapper, limits, reverse, info, tags)); RangeResult r = _r; if (b == allKeys.begin && ((reverse && !r.more) || !reverse)) @@ -3316,7 +3316,7 @@ Future getRangeFallback(Database cx, return r; } -// TODO: Client should add hop keys to conflict ranges. +// TODO: Client should add mapped keys to conflict ranges. void getRangeFinished(Database cx, Reference trLogInfo, double startTime, @@ -3371,17 +3371,17 @@ void getRangeFinished(Database cx, } } -// GetKeyValuesMaybeHopRequest: GetKeyValuesRequest or GetKeyValuesAndHopRequest -// GetKeyValuesMaybeHopReply: GetKeyValuesReply or GetKeyValuesAndHopReply -// Sadly we need GetKeyValuesMaybeHopReply because cannot do something like: state -// REPLY_TYPE(GetKeyValuesMaybeHopRequest) rep; -ACTOR template +// GetKeyValuesFamilyRequest: GetKeyValuesRequest or GetKeyValuesAndFlatMapRequest +// GetKeyValuesFamilyReply: GetKeyValuesReply or GetKeyValuesAndFlatMapReply +// Sadly we need GetKeyValuesFamilyReply because cannot do something like: state +// REPLY_TYPE(GetKeyValuesFamilyRequest) rep; +ACTOR template Future getRange(Database cx, Reference trLogInfo, Future fVersion, KeySelector begin, KeySelector end, - Key hopInfo, + Key mapper, GetRangeLimits limits, Promise> conflictRange, Snapshot snapshot, @@ -3422,12 +3422,12 @@ Future getRange(Database cx, Key locationKey = reverse ? Key(end.getKey(), end.arena()) : Key(begin.getKey(), begin.arena()); Reverse locationBackward{ reverse ? (end - 1).isBackward() : begin.isBackward() }; state std::pair> beginServer = wait(getKeyLocation( - cx, locationKey, getRangeRequestStream(), info, locationBackward)); + cx, locationKey, getRangeRequestStream(), info, locationBackward)); state KeyRange shard = beginServer.first; state bool modifiedSelectors = false; - state GetKeyValuesMaybeHopRequest req; - req.hopInfo = hopInfo; - req.arena.dependsOn(hopInfo.arena()); + state GetKeyValuesFamilyRequest req; + req.mapper = mapper; + req.arena.dependsOn(mapper.arena()); req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys); req.version = readVersion; @@ -3486,17 +3486,17 @@ Future getRange(Database cx, } ++cx->transactionPhysicalReads; - state GetKeyValuesMaybeHopReply rep; + state GetKeyValuesFamilyReply rep; try { if (CLIENT_BUGGIFY_WITH_PROB(.01)) { throw deterministicRandom()->randomChoice( std::vector{ transaction_too_old(), future_version() }); } // state AnnotateActor annotation(currentLineage); - GetKeyValuesMaybeHopReply _rep = + GetKeyValuesFamilyReply _rep = wait(loadBalance(cx.getPtr(), beginServer.second, - getRangeRequestStream(), + getRangeRequestStream(), req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3596,12 +3596,11 @@ Future getRange(Database cx, if (!rep.more) { ASSERT(modifiedSelectors); - TEST(true); // !GetKeyValuesMaybeHopReply.more and modifiedSelectors in getRange + TEST(true); // !GetKeyValuesFamilyReply.more and modifiedSelectors in getRange if (!rep.data.size()) { - RangeResult result = - wait(getRangeFallback( - cx, version, originalBegin, originalEnd, hopInfo, originalLimits, reverse, info, tags)); + RangeResult result = wait(getRangeFallback( + cx, version, originalBegin, originalEnd, mapper, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -3619,7 +3618,7 @@ Future getRange(Database cx, else begin = firstGreaterOrEqual(shard.end); } else { - TEST(true); // GetKeyValuesMaybeHopReply.more in getRange + TEST(true); // GetKeyValuesFamilyReply.more in getRange if (reverse) end = firstGreaterOrEqual(output[output.size() - 1].key); else @@ -3637,9 +3636,8 @@ Future getRange(Database cx, Reverse{ reverse ? (end - 1).isBackward() : begin.isBackward() }); if (e.code() == error_code_wrong_shard_server) { - RangeResult result = - wait(getRangeFallback( - cx, version, originalBegin, originalEnd, hopInfo, originalLimits, reverse, info, tags)); + RangeResult result = wait(getRangeFallback( + cx, version, originalBegin, originalEnd, mapper, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -4511,26 +4509,26 @@ Future Transaction::getKey(const KeySelector& key, Snapshot snapshot) { return getKeyAndConflictRange(cx, key, getReadVersion(), conflictRange, info, options.readTags); } -template +template void increaseCounterForRequest(Database cx) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same::value) { ++cx->transactionGetRangeRequests; - } else if (std::is_same::value) { - ++cx->transactionGetRangeAndHopRequests; + } else if (std::is_same::value) { + ++cx->transactionGetRangeAndFlatMapRequests; } else { UNREACHABLE(); } } -template -Future Transaction::getRangeMaybeHop(const KeySelector& begin, +template +Future Transaction::getRangeInternal(const KeySelector& begin, const KeySelector& end, - const Key& hopInfo, + const Key& mapper, GetRangeLimits limits, Snapshot snapshot, Reverse reverse) { ++cx->transactionLogicalReads; - increaseCounterForRequest(cx); + increaseCounterForRequest(cx); if (limits.isReached()) return RangeResult(); @@ -4562,18 +4560,18 @@ Future Transaction::getRangeMaybeHop(const KeySelector& begin, extraConflictRanges.push_back(conflictRange.getFuture()); } - return ::getRange(cx, - trLogInfo, - getReadVersion(), - b, - e, - hopInfo, - limits, - conflictRange, - snapshot, - reverse, - info, - options.readTags); + return ::getRange(cx, + trLogInfo, + getReadVersion(), + b, + e, + mapper, + limits, + conflictRange, + snapshot, + reverse, + info, + options.readTags); } Future Transaction::getRange(const KeySelector& begin, @@ -4581,18 +4579,18 @@ Future Transaction::getRange(const KeySelector& begin, GetRangeLimits limits, Snapshot snapshot, Reverse reverse) { - return getRangeMaybeHop(begin, end, ""_sr, limits, snapshot, reverse); + return getRangeInternal(begin, end, ""_sr, limits, snapshot, reverse); } -Future Transaction::getRangeAndHop(const KeySelector& begin, - const KeySelector& end, - const Key& hopInfo, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { +Future Transaction::getRangeAndFlatMap(const KeySelector& begin, + const KeySelector& end, + const Key& mapper, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { - return getRangeMaybeHop( - begin, end, hopInfo, limits, snapshot, reverse); + return getRangeInternal( + begin, end, mapper, limits, snapshot, reverse); } Future Transaction::getRange(const KeySelector& begin, diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index fddbbcdf40f..af5b2b74199 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -289,18 +289,18 @@ class Transaction : NonCopyable { reverse); } - [[nodiscard]] Future getRangeAndHop(const KeySelector& begin, - const KeySelector& end, - const Key& hopInfo, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False); + [[nodiscard]] Future getRangeAndFlatMap(const KeySelector& begin, + const KeySelector& end, + const Key& mapper, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False); private: - template - Future getRangeMaybeHop(const KeySelector& begin, + template + Future getRangeInternal(const KeySelector& begin, const KeySelector& end, - const Key& hopInfo, + const Key& mapper, GetRangeLimits limits, Snapshot snapshot, Reverse reverse); diff --git a/fdbclient/PaxosConfigTransaction.h b/fdbclient/PaxosConfigTransaction.h index 509e829b452..3854d4be963 100644 --- a/fdbclient/PaxosConfigTransaction.h +++ b/fdbclient/PaxosConfigTransaction.h @@ -50,12 +50,12 @@ class PaxosConfigTransaction final : public IConfigTransaction, public FastAlloc GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; - Future getRangeAndHop(KeySelector begin, - KeySelector end, - Key hopInfo, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override { + Future getRangeAndFlatMap(KeySelector begin, + KeySelector end, + Key mapper, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override { throw client_invalid_operation(); } void set(KeyRef const& key, ValueRef const& value) override; diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index cd2501d0ad1..56ce22fd07b 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -75,11 +75,11 @@ class RYWImpl { }; template - struct GetRangeAndHopReq { - GetRangeAndHopReq(KeySelector begin, KeySelector end, Key hopInfo, GetRangeLimits limits) - : begin(begin), end(end), hopInfo(hopInfo), limits(limits) {} + struct GetRangeAndFlatMapReq { + GetRangeAndFlatMapReq(KeySelector begin, KeySelector end, Key mapper, GetRangeLimits limits) + : begin(begin), end(end), mapper(mapper), limits(limits) {} KeySelector begin, end; - Key hopInfo; + Key mapper; GetRangeLimits limits; using Result = RangeResult; }; @@ -214,9 +214,9 @@ class RYWImpl { } ACTOR template - static Future readThroughAndHop(ReadYourWritesTransaction* ryw, - GetRangeAndHopReq read, - Snapshot snapshot) { + static Future readThroughAndFlatMap(ReadYourWritesTransaction* ryw, + GetRangeAndFlatMapReq read, + Snapshot snapshot) { if (backwards && read.end.offset > 1) { // FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result // actually does. @@ -227,8 +227,8 @@ class RYWImpl { read.end = KeySelector(firstGreaterOrEqual(key), key.arena()); } - RangeResult v = wait(ryw->tr.getRangeAndHop( - read.begin, read.end, read.hopInfo, read.limits, snapshot, backwards ? Reverse::True : Reverse::False)); + RangeResult v = wait(ryw->tr.getRangeAndFlatMap( + read.begin, read.end, read.mapper, read.limits, snapshot, backwards ? Reverse::True : Reverse::False)); KeyRef maxKey = ryw->getMaxReadKey(); if (v.size() > 0) { if (!backwards && v[v.size() - 1].key >= maxKey) { @@ -349,11 +349,11 @@ class RYWImpl { } } ACTOR template - static Future readWithConflictRangeThroughAndHop(ReadYourWritesTransaction* ryw, - Req req, - Snapshot snapshot) { + static Future readWithConflictRangeThroughAndFlatMap(ReadYourWritesTransaction* ryw, + Req req, + Snapshot snapshot) { choose { - when(typename Req::Result result = wait(readThroughAndHop(ryw, req, snapshot))) { return result; } + when(typename Req::Result result = wait(readThroughAndFlatMap(ryw, req, snapshot))) { return result; } when(wait(ryw->resetPromise.getFuture())) { throw internal_error(); } } } @@ -394,16 +394,16 @@ class RYWImpl { } template - static inline Future readWithConflictRangeAndHop(ReadYourWritesTransaction* ryw, - Req const& req, - Snapshot snapshot) { + static inline Future readWithConflictRangeAndFlatMap(ReadYourWritesTransaction* ryw, + Req const& req, + Snapshot snapshot) { if (ryw->options.readYourWritesDisabled) { - return readWithConflictRangeThroughAndHop(ryw, req, snapshot); + return readWithConflictRangeThroughAndFlatMap(ryw, req, snapshot); } else if (snapshot && ryw->options.snapshotRywEnabled <= 0) { - TEST(true); // readWithConflictRangeSnapshot not supported for hop + TEST(true); // readWithConflictRangeSnapshot not supported for getRangeAndFlatMap throw client_invalid_operation(); } - TEST(true); // readWithConflictRangeRYW not supported for hop + TEST(true); // readWithConflictRangeRYW not supported for getRangeAndFlatMap throw client_invalid_operation(); } @@ -1572,16 +1572,16 @@ Future ReadYourWritesTransaction::getRange(const KeySelector& begin return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } -Future ReadYourWritesTransaction::getRangeAndHop(KeySelector begin, - KeySelector end, - Key hopInfo, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { +Future ReadYourWritesTransaction::getRangeAndFlatMap(KeySelector begin, + KeySelector end, + Key mapper, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { if (getDatabase()->apiVersionAtLeast(630)) { if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() && end.getKey() <= specialKeys.end) { - TEST(true); // Special key space get range (Hop) + TEST(true); // Special key space get range (FlatMap) throw client_invalid_operation(); // Not support special keys. } } else { @@ -1603,7 +1603,7 @@ Future ReadYourWritesTransaction::getRangeAndHop(KeySelector begin, // This optimization prevents nullptr operations from being added to the conflict range if (limits.isReached()) { - TEST(true); // RYW range read limit 0 (Hop) + TEST(true); // RYW range read limit 0 (FlatMap) return RangeResult(); } @@ -1617,15 +1617,15 @@ Future ReadYourWritesTransaction::getRangeAndHop(KeySelector begin, end.removeOrEqual(end.arena()); if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) { - TEST(true); // RYW range inverted (Hop) + TEST(true); // RYW range inverted (FlatMap) return RangeResult(); } Future result = - reverse ? RYWImpl::readWithConflictRangeAndHop( - this, RYWImpl::GetRangeAndHopReq(begin, end, hopInfo, limits), snapshot) - : RYWImpl::readWithConflictRangeAndHop( - this, RYWImpl::GetRangeAndHopReq(begin, end, hopInfo, limits), snapshot); + reverse ? RYWImpl::readWithConflictRangeAndFlatMap( + this, RYWImpl::GetRangeAndFlatMapReq(begin, end, mapper, limits), snapshot) + : RYWImpl::readWithConflictRangeAndFlatMap( + this, RYWImpl::GetRangeAndFlatMapReq(begin, end, mapper, limits), snapshot); reading.add(success(result)); return result; diff --git a/fdbclient/ReadYourWrites.h b/fdbclient/ReadYourWrites.h index d301b094ec1..19ce5c8775b 100644 --- a/fdbclient/ReadYourWrites.h +++ b/fdbclient/ReadYourWrites.h @@ -104,12 +104,12 @@ class ReadYourWritesTransaction final : NonCopyable, snapshot, reverse); } - Future getRangeAndHop(KeySelector begin, - KeySelector end, - Key hopInfo, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override; + Future getRangeAndFlatMap(KeySelector begin, + KeySelector end, + Key mapper, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override; [[nodiscard]] Future>> getAddressesForKey(const Key& key) override; Future>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override; diff --git a/fdbclient/SimpleConfigTransaction.h b/fdbclient/SimpleConfigTransaction.h index ddd39e9e8c6..168b1a6c294 100644 --- a/fdbclient/SimpleConfigTransaction.h +++ b/fdbclient/SimpleConfigTransaction.h @@ -59,12 +59,12 @@ class SimpleConfigTransaction final : public IConfigTransaction, public FastAllo GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; - Future getRangeAndHop(KeySelector begin, - KeySelector end, - Key hopInfo, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override { + Future getRangeAndFlatMap(KeySelector begin, + KeySelector end, + Key mapper, + GetRangeLimits limits, + Snapshot = Snapshot::False, + Reverse = Reverse::False) override { throw client_invalid_operation(); } Future commit() override; diff --git a/fdbclient/StorageServerInterface.cpp b/fdbclient/StorageServerInterface.cpp index 8f2b8637cbf..11c9f46f562 100644 --- a/fdbclient/StorageServerInterface.cpp +++ b/fdbclient/StorageServerInterface.cpp @@ -152,22 +152,22 @@ void TSS_traceMismatch(TraceEvent& event, .detail("TSSReply", tssResultsString); } -// range reads and hop +// range reads and flat map template <> -bool TSS_doCompare(const GetKeyValuesAndHopReply& src, const GetKeyValuesAndHopReply& tss) { +bool TSS_doCompare(const GetKeyValuesAndFlatMapReply& src, const GetKeyValuesAndFlatMapReply& tss) { return src.more == tss.more && src.data == tss.data; } template <> -const char* TSS_mismatchTraceName(const GetKeyValuesAndHopRequest& req) { - return "TSSMismatchGetKeyValuesAndHop"; +const char* TSS_mismatchTraceName(const GetKeyValuesAndFlatMapRequest& req) { + return "TSSMismatchGetKeyValuesAndFlatMap"; } template <> void TSS_traceMismatch(TraceEvent& event, - const GetKeyValuesAndHopRequest& req, - const GetKeyValuesAndHopReply& src, - const GetKeyValuesAndHopReply& tss) { + const GetKeyValuesAndFlatMapRequest& req, + const GetKeyValuesAndFlatMapReply& src, + const GetKeyValuesAndFlatMapReply& tss) { std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : ""); for (auto& it : src.data) { ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); @@ -396,9 +396,9 @@ void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, } template <> -void TSSMetrics::recordLatency(const GetKeyValuesAndHopRequest& req, double ssLatency, double tssLatency) { - SSgetKeyValuesAndHopLatency.addSample(ssLatency); - TSSgetKeyValuesAndHopLatency.addSample(tssLatency); +void TSSMetrics::recordLatency(const GetKeyValuesAndFlatMapRequest& req, double ssLatency, double tssLatency) { + SSgetKeyValuesAndFlatMapLatency.addSample(ssLatency); + TSSgetKeyValuesAndFlatMapLatency.addSample(tssLatency); } template <> diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index 4e2af9c482f..ba912adbb1c 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -66,7 +66,7 @@ struct StorageServerInterface { // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read RequestStream getKeyValues; - RequestStream getKeyValuesAndHop; + RequestStream getKeyValuesAndFlatMap; RequestStream getShardState; RequestStream waitMetrics; @@ -131,8 +131,8 @@ struct StorageServerInterface { RequestStream(getValue.getEndpoint().getAdjustedEndpoint(15)); changeFeedPop = RequestStream(getValue.getEndpoint().getAdjustedEndpoint(16)); - getKeyValuesAndHop = - RequestStream(getValue.getEndpoint().getAdjustedEndpoint(17)); + getKeyValuesAndFlatMap = + RequestStream(getValue.getEndpoint().getAdjustedEndpoint(17)); } } else { ASSERT(Ar::isDeserializing); @@ -178,7 +178,7 @@ struct StorageServerInterface { streams.push_back(changeFeedStream.getReceiver()); streams.push_back(overlappingChangeFeeds.getReceiver()); streams.push_back(changeFeedPop.getReceiver()); - streams.push_back(getKeyValuesAndHop.getReceiver(TaskPriority::LoadBalancedEndpoint)); + streams.push_back(getKeyValuesAndFlatMap.getReceiver(TaskPriority::LoadBalancedEndpoint)); FlowTransport::transport().addEndpoints(streams); } }; @@ -303,7 +303,7 @@ struct GetKeyValuesRequest : TimedRequest { KeySelectorRef begin, end; // This is a dummy field there has never been used. // TODO: Get rid of this by constexpr or other template magic in getRange - KeyRef hopInfo = KeyRef(); + KeyRef mapper = KeyRef(); Version version; // or latestVersion int limit, limitBytes; bool isFetchKeys; @@ -318,16 +318,15 @@ struct GetKeyValuesRequest : TimedRequest { } }; -struct GetKeyValuesAndHopReply : public LoadBalancedReply { +struct GetKeyValuesAndFlatMapReply : public LoadBalancedReply { constexpr static FileIdentifier file_identifier = 1783067; Arena arena; - // The key is the key in the requested range rather than the hop key. VectorRef data; Version version; // useful when latestVersion was requested bool more; bool cached = false; - GetKeyValuesAndHopReply() : version(invalidVersion), more(false), cached(false) {} + GetKeyValuesAndFlatMapReply() : version(invalidVersion), more(false), cached(false) {} template void serialize(Ar& ar) { @@ -335,24 +334,24 @@ struct GetKeyValuesAndHopReply : public LoadBalancedReply { } }; -struct GetKeyValuesAndHopRequest : TimedRequest { +struct GetKeyValuesAndFlatMapRequest : TimedRequest { constexpr static FileIdentifier file_identifier = 6795747; SpanID spanContext; Arena arena; KeySelectorRef begin, end; - KeyRef hopInfo; + KeyRef mapper; Version version; // or latestVersion int limit, limitBytes; bool isFetchKeys; Optional tags; Optional debugID; - ReplyPromise reply; + ReplyPromise reply; - GetKeyValuesAndHopRequest() : isFetchKeys(false) {} + GetKeyValuesAndFlatMapRequest() : isFetchKeys(false) {} template void serialize(Ar& ar) { serializer( - ar, begin, end, hopInfo, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, spanContext, arena); + ar, begin, end, mapper, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, spanContext, arena); } }; diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index f03b10d8c2f..ace522cac02 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -257,20 +257,20 @@ ThreadFuture ThreadSafeTransaction::getRange(const KeySelectorRef& }); } -ThreadFuture ThreadSafeTransaction::getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) { +ThreadFuture ThreadSafeTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) { KeySelector b = begin; KeySelector e = end; - Key h = hopInfo; + Key h = mapper; ISingleThreadTransaction* tr = this->tr; return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future { tr->checkDeferredError(); - return tr->getRangeAndHop(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse }); + return tr->getRangeAndFlatMap(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse }); }); } diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index b8ff9b7bfb3..85ae27c1fe9 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -106,12 +106,12 @@ class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndHop(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& hopInfo, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; + ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, + const KeySelectorRef& end, + const StringRef& mapper, + GetRangeLimits limits, + bool snapshot, + bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; diff --git a/fdbrpc/TSSComparison.h b/fdbrpc/TSSComparison.h index f230f30fd86..54114c2db80 100644 --- a/fdbrpc/TSSComparison.h +++ b/fdbrpc/TSSComparison.h @@ -51,12 +51,12 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { ContinuousSample SSgetValueLatency; ContinuousSample SSgetKeyLatency; ContinuousSample SSgetKeyValuesLatency; - ContinuousSample SSgetKeyValuesAndHopLatency; + ContinuousSample SSgetKeyValuesAndFlatMapLatency; ContinuousSample TSSgetValueLatency; ContinuousSample TSSgetKeyLatency; ContinuousSample TSSgetKeyValuesLatency; - ContinuousSample TSSgetKeyValuesAndHopLatency; + ContinuousSample TSSgetKeyValuesAndFlatMapLatency; std::unordered_map ssErrorsByCode; std::unordered_map tssErrorsByCode; @@ -105,8 +105,8 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { : cc("TSSClientMetrics"), requests("Requests", cc), streamComparisons("StreamComparisons", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc), tssTimeouts("TSSTimeouts", cc), mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000), SSgetKeyValuesLatency(1000), - SSgetKeyValuesAndHopLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), - TSSgetKeyValuesLatency(1000), TSSgetKeyValuesAndHopLatency(1000) {} + SSgetKeyValuesAndFlatMapLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), + TSSgetKeyValuesLatency(1000), TSSgetKeyValuesAndFlatMapLatency(1000) {} }; template diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index cdec6c3375e..35a8ad0eaf4 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -780,7 +780,7 @@ struct StorageServer { struct Counters { CounterCollection cc; - Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeAndHopQueries, + Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeAndFlatMapQueries, getRangeStreamQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries; @@ -809,8 +809,9 @@ struct StorageServer { Counter wrongShardServer; Counter fetchedVersions; Counter fetchesFromLogs; - // The following counters measure how many of "hop"s in the getRangeAndHopQueries are effective. "Miss" means - // fallback if fallback is enabled, otherwise means failure (so that another layer could implement fallback). + // The following counters measure how many of lookups in the getRangeAndFlatMapQueries are effective. "Miss" + // means fallback if fallback is enabled, otherwise means failure (so that another layer could implement + // fallback). Counter quickGetValueHit, quickGetValueMiss, quickGetKeyValuesHit, quickGetKeyValuesMiss; LatencySample readLatencySample; @@ -819,7 +820,7 @@ struct StorageServer { Counters(StorageServer* self) : cc("StorageServer", self->thisServerID.toString()), allQueries("QueryQueue", cc), getKeyQueries("GetKeyQueries", cc), getValueQueries("GetValueQueries", cc), - getRangeQueries("GetRangeQueries", cc), getRangeAndHopQueries("GetRangeAndHopQueries", cc), + getRangeQueries("GetRangeQueries", cc), getRangeAndFlatMapQueries("GetRangeAndFlatMapQueries", cc), getRangeStreamQueries("GetRangeStreamQueries", cc), finishedQueries("FinishedQueries", cc), lowPriorityQueries("LowPriorityQueries", cc), rowsQueried("RowsQueried", cc), bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), emptyQueries("EmptyQueries", cc), @@ -2020,7 +2021,7 @@ ACTOR Future> quickGetValue(StorageServer* data, StringRef key, state Optional valueOption = wait(valueFuture); return valueOption; } else { - throw hop_quick_get_value_miss(); + throw quick_get_value_miss(); } }; @@ -2540,20 +2541,20 @@ ACTOR Future quickGetKeyValues(StorageServer* data, StringRef prefi RangeResult rangeResult = wait(rangeResultFuture); return rangeResult; } else { - throw hop_quick_get_key_values_miss(); + throw quick_get_key_values_miss(); } }; -Key constructHopKey(KeyValueRef* keyValue, Tuple& hopKeyFormatTuple, bool& isRangeQuery) { +Key constructMappedKey(KeyValueRef* keyValue, Tuple& mappedKeyFormatTuple, bool& isRangeQuery) { // Lazily parse key and/or value to tuple because they may not need to be a tuple if not used. Optional keyTuple; Optional valueTuple; - Tuple hopKeyTuple; - for (int i = 0; i < hopKeyFormatTuple.size(); i++) { - Tuple::ElementType type = hopKeyFormatTuple.getType(i); + Tuple mappedKeyTuple; + for (int i = 0; i < mappedKeyFormatTuple.size(); i++) { + Tuple::ElementType type = mappedKeyFormatTuple.getType(i); if (type == Tuple::BYTES || type == Tuple::UTF8) { - std::string s = hopKeyFormatTuple.getString(i).toString(); + std::string s = mappedKeyFormatTuple.getString(i).toString(); auto sz = s.size(); // Handle escape. @@ -2580,7 +2581,7 @@ Key constructHopKey(KeyValueRef* keyValue, Tuple& hopKeyFormatTuple, bool& isRan } if (escaped) { // If the element uses escape, cope the escaped version. - hopKeyTuple.append(s); + mappedKeyTuple.append(s); } // {K[??]} or {V[??]} else if (sz > 5 && s[0] == '{' && (s[1] == 'K' || s[1] == 'V') && s[2] == '[' && s[sz - 2] == ']' && @@ -2589,7 +2590,7 @@ Key constructHopKey(KeyValueRef* keyValue, Tuple& hopKeyFormatTuple, bool& isRan try { idx = std::stoi(s.substr(3, sz - 5)); } catch (std::exception& e) { - throw hop_bad_index(); + throw mapper_bad_index(); } Tuple* referenceTuple; if (s[1] == 'K') { @@ -2612,109 +2613,109 @@ Key constructHopKey(KeyValueRef* keyValue, Tuple& hopKeyFormatTuple, bool& isRan } if (idx < 0 || idx >= referenceTuple->size()) { - throw hop_bad_index(); + throw mapper_bad_index(); } - hopKeyTuple.append(referenceTuple->subTuple(idx, idx + 1)); + mappedKeyTuple.append(referenceTuple->subTuple(idx, idx + 1)); } else if (s == "{...}") { // Range query. - if (i != hopKeyFormatTuple.size() - 1) { - // It must be the last element of the hop info tuple - throw hop_bad_range_decriptor(); + if (i != mappedKeyFormatTuple.size() - 1) { + // It must be the last element of the mapper tuple + throw mapper_bad_range_decriptor(); } // Every record will try to set it. It's ugly, but not wrong. isRangeQuery = true; - // Do not add it to the hop key. + // Do not add it to the mapped key. } else { // If the element is a string but neither escaped nor descriptors, just copy it. - hopKeyTuple.append(hopKeyFormatTuple.subTuple(i, i + 1)); + mappedKeyTuple.append(mappedKeyFormatTuple.subTuple(i, i + 1)); } } else { // If the element not a string, just copy it. - hopKeyTuple.append(hopKeyFormatTuple.subTuple(i, i + 1)); + mappedKeyTuple.append(mappedKeyFormatTuple.subTuple(i, i + 1)); } } - return hopKeyTuple.getDataAsStandalone(); + return mappedKeyTuple.getDataAsStandalone(); } -TEST_CASE("/fdbserver/storageserver/constructHopKey") { +TEST_CASE("/fdbserver/storageserver/constructMappedKey") { Key key = Tuple().append("key-0"_sr).append("key-1"_sr).append("key-2"_sr).getDataAsStandalone(); Value value = Tuple().append("value-0"_sr).append("value-1"_sr).append("value-2"_sr).getDataAsStandalone(); state KeyValueRef kvr(key, value); { - Tuple hopInfoTuple = Tuple() - .append("normal"_sr) - .append("{{escaped}}"_sr) - .append("{K[2]}"_sr) - .append("{V[0]}"_sr) - .append("{...}"_sr); + Tuple mapperTuple = Tuple() + .append("normal"_sr) + .append("{{escaped}}"_sr) + .append("{K[2]}"_sr) + .append("{V[0]}"_sr) + .append("{...}"_sr); bool isRangeQuery = false; - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); - - Key expectedHopKey = Tuple() - .append("normal"_sr) - .append("{escaped}"_sr) - .append("key-2"_sr) - .append("value-0"_sr) - .getDataAsStandalone(); - // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; - ASSERT(hopKey.compare(expectedHopKey) == 0); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); + + Key expectedMappedKey = Tuple() + .append("normal"_sr) + .append("{escaped}"_sr) + .append("key-2"_sr) + .append("value-0"_sr) + .getDataAsStandalone(); + // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; + ASSERT(mappedKey.compare(expectedMappedKey) == 0); ASSERT(isRangeQuery == true); } { - Tuple hopInfoTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); + Tuple mapperTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); bool isRangeQuery = false; - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - Key expectedHopKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); - // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; - ASSERT(hopKey.compare(expectedHopKey) == 0); + Key expectedMappedKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); + // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; + ASSERT(mappedKey.compare(expectedMappedKey) == 0); ASSERT(isRangeQuery == false); } { - Tuple hopInfoTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); + Tuple mapperTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); bool isRangeQuery = false; - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - Key expectedHopKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); - // std::cout << printable(hopKey) << " == " << printable(expectedHopKey) << std::endl; - ASSERT(hopKey.compare(expectedHopKey) == 0); + Key expectedMappedKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); + // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; + ASSERT(mappedKey.compare(expectedMappedKey) == 0); ASSERT(isRangeQuery == false); } { - Tuple hopInfoTuple = Tuple().append("{K[100]}"_sr); + Tuple mapperTuple = Tuple().append("{K[100]}"_sr); bool isRangeQuery = false; state bool throwException = false; try { - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); } catch (Error& e) { - ASSERT(e.code() == error_code_hop_bad_index); + ASSERT(e.code() == error_code_mapper_bad_index); throwException = true; } ASSERT(throwException); } { - Tuple hopInfoTuple = Tuple().append("{...}"_sr).append("last-element"_sr); + Tuple mapperTuple = Tuple().append("{...}"_sr).append("last-element"_sr); bool isRangeQuery = false; state bool throwException2 = false; try { - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); } catch (Error& e) { - ASSERT(e.code() == error_code_hop_bad_range_decriptor); + ASSERT(e.code() == error_code_mapper_bad_range_decriptor); throwException2 = true; } ASSERT(throwException2); } { - Tuple hopInfoTuple = Tuple().append("{K[not-a-number]}"_sr); + Tuple mapperTuple = Tuple().append("{K[not-a-number]}"_sr); bool isRangeQuery = false; state bool throwException3 = false; try { - Key hopKey = constructHopKey(&kvr, hopInfoTuple, isRangeQuery); + Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); } catch (Error& e) { - ASSERT(e.code() == error_code_hop_bad_index); + ASSERT(e.code() == error_code_mapper_bad_index); throwException3 = true; } ASSERT(throwException3); @@ -2722,8 +2723,8 @@ TEST_CASE("/fdbserver/storageserver/constructHopKey") { return Void(); } -ACTOR Future hop(StorageServer* data, GetKeyValuesReply input, StringRef hopInfo) { - state GetKeyValuesAndHopReply result; +ACTOR Future flatMap(StorageServer* data, GetKeyValuesReply input, StringRef mapper) { + state GetKeyValuesAndFlatMapReply result; result.version = input.version; result.more = input.more; result.cached = input.cached; @@ -2731,37 +2732,36 @@ ACTOR Future hop(StorageServer* data, GetKeyValuesReply result.data.reserve(result.arena, input.data.size()); state bool isRangeQuery = false; - state Tuple hopKeyFormatTuple = Tuple::unpack(hopInfo); + state Tuple mappedKeyFormatTuple = Tuple::unpack(mapper); state KeyValueRef* it = input.data.begin(); for (; it != input.data.end(); it++) { state StringRef key = it->key; - state Key hopKey = constructHopKey(it, hopKeyFormatTuple, isRangeQuery); - // Make sure the hopKey is always available, so that it's good even we want to get key asynchronously. - result.arena.dependsOn(hopKey.arena()); + state Key mappedKey = constructMappedKey(it, mappedKeyFormatTuple, isRangeQuery); + // Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously. + result.arena.dependsOn(mappedKey.arena()); if (isRangeQuery) { - // Use the hopKey as the prefix of the range query. - RangeResult rangeResult = wait(quickGetKeyValues(data, hopKey, input.version)); + // Use the mappedKey as the prefix of the range query. + RangeResult rangeResult = wait(quickGetKeyValues(data, mappedKey, input.version)); if (rangeResult.more) { // Probably the fan out is too large. The user should use the old way to query. - throw hop_quick_get_key_values_has_more(); + throw quick_get_key_values_has_more(); } result.arena.dependsOn(rangeResult.arena()); for (int i = 0; i < rangeResult.size(); i++) { result.data.emplace_back(result.arena, rangeResult[i].key, rangeResult[i].value); } } else { - Optional valueOption = wait(quickGetValue(data, hopKey, input.version)); + Optional valueOption = wait(quickGetValue(data, mappedKey, input.version)); if (valueOption.present()) { Value value = valueOption.get(); result.arena.dependsOn(value.arena()); - result.data.emplace_back(result.arena, hopKey, value); + result.data.emplace_back(result.arena, mappedKey, value); } else { // TODO: Shall we throw exception if the key doesn't exist or the range is empty? - // throw hop_no_such_key(); } } } @@ -2770,17 +2770,17 @@ ACTOR Future hop(StorageServer* data, GetKeyValuesReply // Most of the actor is copied from getKeyValuesQ. I tried to use templates but things become nearly impossible after // combining actor shenanigans with template shenanigans. -ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRequest req) +ACTOR Future getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndFlatMapRequest req) // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read { - state Span span("SS:getKeyValuesAndHop"_loc, { req.spanContext }); + state Span span("SS:getKeyValuesAndFlatMap"_loc, { req.spanContext }); state int64_t resultSize = 0; state IKeyValueStore::ReadType type = req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL; getCurrentLineage()->modify(&TransactionLineage::txID) = req.spanContext.first(); - ++data->counters.getRangeAndHopQueries; + ++data->counters.getRangeAndFlatMapQueries; ++data->counters.allQueries; ++data->readQueueSizeMetric; data->maxQueryQueue = std::max( @@ -2797,7 +2797,7 @@ ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRe try { if (req.debugID.present()) g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.Before"); + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Before"); state Version version = wait(waitForVersion(data, req.version, span.context)); state uint64_t changeCounter = data->shardChangeCounter; @@ -2806,16 +2806,16 @@ ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRe if (req.debugID.present()) g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterVersion"); + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterVersion"); //.detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end); //} catch (Error& e) { TraceEvent("WrongShardServer", data->thisServerID).detail("Begin", // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("Shard", - //"None").detail("In", "getKeyValuesAndHop>getShardKeyRange"); throw e; } + //"None").detail("In", "getKeyValuesAndFlatMap>getShardKeyRange"); throw e; } if (!selectorInRange(req.end, shard) && !(req.end.isFirstGreaterOrEqual() && req.end.getKey() == shard.end)) { // TraceEvent("WrongShardServer1", data->thisServerID).detail("Begin", // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", - // shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndHop>checkShardExtents"); + // shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkShardExtents"); throw wrong_shard_server(); } @@ -2832,7 +2832,7 @@ ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRe if (req.debugID.present()) g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterKeys"); + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterKeys"); //.detail("Off1",offset1).detail("Off2",offset2).detail("ReqBegin",req.begin.getKey()).detail("ReqEnd",req.end.getKey()); // Offsets of zero indicate begin/end keys in this shard, which obviously means we can answer the query @@ -2840,22 +2840,22 @@ ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRe // end the last actual key returned must be from this shard. A begin offset of 1 is also OK because then either // begin is past end or equal to end (so the result is definitely empty) if ((offset1 && offset1 != 1) || (offset2 && offset2 != 1)) { - TEST(true); // wrong_shard_server due to offset in getKeyValuesWithHopQ + TEST(true); // wrong_shard_server due to offset in getKeyValuesAndFlatMapQ // We could detect when offset1 takes us off the beginning of the database or offset2 takes us off the end, // and return a clipped range rather than an error (since that is what the NativeAPI.getRange will do anyway // via its "slow path"), but we would have to add some flags to the response to encode whether we went off // the beginning and the end, since it needs that information. - //TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndHop>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2); + //TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2); throw wrong_shard_server(); } if (begin >= end) { if (req.debugID.present()) g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.Send"); + "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Send"); //.detail("Begin",begin).detail("End",end); - GetKeyValuesAndHopReply none; + GetKeyValuesAndFlatMapReply none; none.version = version; none.more = false; none.penalty = data->getPenalty(); @@ -2870,22 +2870,23 @@ ACTOR Future getKeyValuesAndHopQ(StorageServer* data, GetKeyValuesAndHopRe GetKeyValuesReply _r = wait( readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span.context, type)); - // Hop!!! - state GetKeyValuesAndHopReply r = wait(hop(data, _r, req.hopInfo)); + // Map the scanned range to another list of keys and look up. + state GetKeyValuesAndFlatMapReply r = wait(flatMap(data, _r, req.mapper)); if (req.debugID.present()) - g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndHop.AfterReadRange"); + g_traceBatch.addEvent("TransactionDebug", + req.debugID.get().first(), + "storageserver.getKeyValuesAndFlatMap.AfterReadRange"); //.detail("Begin",begin).detail("End",end).detail("SizeOf",r.data.size()); data->checkChangeCounter( changeCounter, KeyRangeRef(std::min(begin, std::min(req.begin.getKey(), req.end.getKey())), std::max(end, std::max(req.begin.getKey(), req.end.getKey())))); if (EXPENSIVE_VALIDATION) { - // TODO: Only hop keys are returned, which are not supposed to be in the range. + // TODO: Only mapped keys are returned, which are not supposed to be in the range. // for (int i = 0; i < r.data.size(); i++) // ASSERT(r.data[i].key >= begin && r.data[i].key < end); - // TODO: GetKeyValuesWithHopRequest doesn't respect limit yet. + // TODO: GetKeyValuesWithFlatMapRequest doesn't respect limit yet. // ASSERT(r.data.size() <= std::abs(req.limit)); } @@ -6163,16 +6164,17 @@ ACTOR Future serveGetKeyValuesRequests(StorageServer* self, FutureStream serveGetKeyValuesAndHopRequests(StorageServer* self, - FutureStream getKeyValuesAndHop) { +ACTOR Future serveGetKeyValuesAndFlatMapRequests( + StorageServer* self, + FutureStream getKeyValuesAndFlatMap) { // TODO: Is it fine to keep TransactionLineage::Operation::GetKeyValues here? getCurrentLineage()->modify(&TransactionLineage::operation) = TransactionLineage::Operation::GetKeyValues; loop { - GetKeyValuesAndHopRequest req = waitNext(getKeyValuesAndHop); + GetKeyValuesAndFlatMapRequest req = waitNext(getKeyValuesAndFlatMap); // Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade // before doing real work - self->actors.add(self->readGuard(req, getKeyValuesAndHopQ)); + self->actors.add(self->readGuard(req, getKeyValuesAndFlatMapQ)); } } @@ -6375,7 +6377,7 @@ ACTOR Future storageServerCore(StorageServer* self, StorageServerInterface self->actors.add(checkBehind(self)); self->actors.add(serveGetValueRequests(self, ssi.getValue.getFuture())); self->actors.add(serveGetKeyValuesRequests(self, ssi.getKeyValues.getFuture())); - self->actors.add(serveGetKeyValuesAndHopRequests(self, ssi.getKeyValuesAndHop.getFuture())); + self->actors.add(serveGetKeyValuesAndFlatMapRequests(self, ssi.getKeyValuesAndFlatMap.getFuture())); self->actors.add(serveGetKeyValuesStreamRequests(self, ssi.getKeyValuesStream.getFuture())); self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture())); self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture())); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 5c8981b34d3..1e50036fd87 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1097,7 +1097,7 @@ ACTOR Future storageServerRollbackRebooter(std::set storageServerRollbackRebooter(std::set(), Reference(nullptr)); @@ -1480,7 +1480,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); - DUMPTOKEN(recruited.getKeyValuesAndHop); + DUMPTOKEN(recruited.getKeyValuesAndFlatMap); Promise recovery; Future f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord); @@ -1577,7 +1577,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getValue); DUMPTOKEN(recruited.getKey); DUMPTOKEN(recruited.getKeyValues); - DUMPTOKEN(recruited.getKeyValuesAndHop); + DUMPTOKEN(recruited.getKeyValuesAndFlatMap); DUMPTOKEN(recruited.getShardState); DUMPTOKEN(recruited.waitMetrics); DUMPTOKEN(recruited.splitMetrics); @@ -1935,7 +1935,7 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); - DUMPTOKEN(recruited.getKeyValuesAndHop); + DUMPTOKEN(recruited.getKeyValuesAndFlatMap); // printf("Recruited as storageServer\n"); std::string filename = diff --git a/fdbserver/workloads/IndexPrefetchDemo.actor.cpp b/fdbserver/workloads/IndexPrefetchDemo.actor.cpp index 079cf416644..a614e80462d 100644 --- a/fdbserver/workloads/IndexPrefetchDemo.actor.cpp +++ b/fdbserver/workloads/IndexPrefetchDemo.actor.cpp @@ -97,16 +97,17 @@ struct IndexPrefetchDemoWorkload : TestWorkload { return Void(); } - ACTOR Future scanRangeAndHop(Database cx, KeyRange range, Key hopInfo) { - std::cout << "start scanRangeAndHop " << range.toString() << std::endl; + ACTOR Future scanRangeAndFlatMap(Database cx, KeyRange range, Key mapper) { + std::cout << "start scanRangeAndFlatMap " << range.toString() << std::endl; // TODO: When n is large, split into multiple transactions. state Transaction tr(cx); try { tr.reset(); - RangeResult result = wait(tr.getRangeAndHop(KeySelector(firstGreaterOrEqual(range.begin), range.arena()), - KeySelector(firstGreaterOrEqual(range.end), range.arena()), - hopInfo, - GetRangeLimits(CLIENT_KNOBS->TOO_MANY))); + RangeResult result = + wait(tr.getRangeAndFlatMap(KeySelector(firstGreaterOrEqual(range.begin), range.arena()), + KeySelector(firstGreaterOrEqual(range.end), range.arena()), + mapper, + GetRangeLimits(CLIENT_KNOBS->TOO_MANY))); showResult(result); // result size: 2 // key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-2\x00, value=\x01data-of-record-2\x00 @@ -114,7 +115,7 @@ struct IndexPrefetchDemoWorkload : TestWorkload { } catch (Error& e) { wait(tr.onError(e)); } - std::cout << "finished scanRangeAndHop" << std::endl; + std::cout << "finished scanRangeAndFlatMap" << std::endl; return Void(); } @@ -129,10 +130,10 @@ struct IndexPrefetchDemoWorkload : TestWorkload { state KeyRange someIndexes = KeyRangeRef(someIndexesBegin, someIndexesEnd); wait(self->scanRange(cx, someIndexes)); - Tuple hopInfoTuple; - hopInfoTuple << prefix << RECORD << "{K[3]}"_sr; - Key hopInfo = hopInfoTuple.getDataAsStandalone(); - wait(self->scanRangeAndHop(cx, someIndexes, hopInfo)); + Tuple mapperTuple; + mapperTuple << prefix << RECORD << "{K[3]}"_sr; + Key mapper = mapperTuple.getDataAsStandalone(); + wait(self->scanRangeAndFlatMap(cx, someIndexes, mapper)); return Void(); } diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 6e969d2a2ea..5e815c57987 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -159,12 +159,12 @@ ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback cal ERROR( invalid_config_db_range_read, 2027, "Invalid configuration database range read" ) ERROR( invalid_config_db_key, 2028, "Invalid configuration database key provided" ) ERROR( invalid_config_path, 2029, "Invalid configuration path" ) -ERROR( hop_bad_index, 2030, "The index in K[] or V[] is not a valid number or out of range" ) -ERROR( hop_no_such_key, 2031, "A hop key is not set in database" ) -ERROR( hop_bad_range_decriptor, 2032, "\"{...}\" must be the last element of the hop info tuple" ) -ERROR( hop_quick_get_key_values_has_more, 2033, "One of the secondary range queries is too large" ) -ERROR( hop_quick_get_value_miss, 2034, "Find a hop key that is not served in the same SS" ) -ERROR( hop_quick_get_key_values_miss, 2035, "Find a hop range that is not served in the same SS" ) +ERROR( mapper_bad_index, 2030, "The index in K[] or V[] is not a valid number or out of range" ) +ERROR( mapper_no_such_key, 2031, "A mapped key is not set in database" ) +ERROR( mapper_bad_range_decriptor, 2032, "\"{...}\" must be the last element of the mapper tuple" ) +ERROR( quick_get_key_values_has_more, 2033, "One of the mapped range queries is too large" ) +ERROR( quick_get_value_miss, 2034, "Found a mapped key that is not served in the same SS" ) +ERROR( quick_get_key_values_miss, 2035, "Found a mapped range that is not served in the same SS" ) ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" ) ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" ) From 586cc3b102c323b0c3950f7c70f5ab1791c1bae1 Mon Sep 17 00:00:00 2001 From: Tao Lin Date: Thu, 4 Nov 2021 08:46:56 -0700 Subject: [PATCH 52/69] Revert "Introduce GetRangeAndFlatMap to push computations down to FDB" --- bindings/c/fdb_c.cpp | 93 +--- bindings/c/foundationdb/fdb_c.h | 18 - bindings/c/test/unit/fdb_api.cpp | 35 -- bindings/c/test/unit/fdb_api.hpp | 18 - bindings/c/test/unit/unit_tests.cpp | 162 +----- bindings/java/fdbJNI.cpp | 70 --- .../RangeAndFlatMapQueryIntegrationTest.java | 253 --------- .../foundationdb/FakeFDBTransaction.java | 7 +- .../apple/foundationdb/FDBTransaction.java | 44 +- .../com/apple/foundationdb/RangeQuery.java | 24 +- .../apple/foundationdb/ReadTransaction.java | 36 -- bindings/java/src/tests.cmake | 1 - .../release-notes/release-notes-700.rst | 3 - fdbclient/DatabaseContext.h | 1 - fdbclient/IClientApi.h | 6 - fdbclient/ISingleThreadTransaction.h | 6 - fdbclient/MultiVersionTransaction.actor.cpp | 48 -- fdbclient/MultiVersionTransaction.h | 29 - fdbclient/NativeAPI.actor.cpp | 214 +++---- fdbclient/NativeAPI.actor.h | 17 - fdbclient/PaxosConfigTransaction.h | 8 - fdbclient/ReadYourWrites.actor.cpp | 122 ---- fdbclient/ReadYourWrites.h | 6 - fdbclient/ServerKnobs.cpp | 2 - fdbclient/ServerKnobs.h | 2 - fdbclient/SimpleConfigTransaction.h | 8 - fdbclient/StorageServerInterface.cpp | 45 -- fdbclient/StorageServerInterface.h | 45 -- fdbclient/ThreadSafeTransaction.cpp | 17 - fdbclient/ThreadSafeTransaction.h | 6 - fdbrpc/TSSComparison.h | 5 +- fdbserver/CMakeLists.txt | 1 - fdbserver/storageserver.actor.cpp | 525 +----------------- fdbserver/worker.actor.cpp | 5 - .../workloads/IndexPrefetchDemo.actor.cpp | 145 ----- flow/Platform.h | 1 - flow/error_definitions.h | 6 - tests/CMakeLists.txt | 1 - tests/fast/IndexPrefetchDemo.toml | 6 - 39 files changed, 130 insertions(+), 1911 deletions(-) delete mode 100644 bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java delete mode 100644 fdbserver/workloads/IndexPrefetchDemo.actor.cpp delete mode 100644 tests/fast/IndexPrefetchDemo.toml diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index e5545251f2f..ecb78e4df7a 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -436,12 +436,21 @@ extern "C" DLLEXPORT FDBFuture* fdb_transaction_get_addresses_for_key(FDBTransac return (FDBFuture*)(TXN(tr)->getAddressesForKey(KeyRef(key_name, key_name_length)).extractPtr()); } -// Set to the actual limit, target_bytes, and reverse. -FDBFuture* validate_and_update_parameters(int& limit, - int& target_bytes, +FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, + uint8_t const* begin_key_name, + int begin_key_name_length, + fdb_bool_t begin_or_equal, + int begin_offset, + uint8_t const* end_key_name, + int end_key_name_length, + fdb_bool_t end_or_equal, + int end_offset, + int limit, + int target_bytes, FDBStreamingMode mode, int iteration, - fdb_bool_t& reverse) { + fdb_bool_t snapshot, + fdb_bool_t reverse) { /* This method may be called with a runtime API version of 13, in which negative row limits are a reverse range read */ if (g_api_version <= 13 && limit < 0) { @@ -491,27 +500,6 @@ FDBFuture* validate_and_update_parameters(int& limit, else if (mode_bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED) target_bytes = std::min(target_bytes, mode_bytes); - return nullptr; -} - -FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse); - if (r != nullptr) - return r; return ( FDBFuture*)(TXN(tr) ->getRange( @@ -523,60 +511,6 @@ FDBFuture* fdb_transaction_get_range_impl(FDBTransaction* tr, .extractPtr()); } -FDBFuture* fdb_transaction_get_range_and_flat_map_impl(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - FDBFuture* r = validate_and_update_parameters(limit, target_bytes, mode, iteration, reverse); - if (r != nullptr) - return r; - return ( - FDBFuture*)(TXN(tr) - ->getRangeAndFlatMap( - KeySelectorRef(KeyRef(begin_key_name, begin_key_name_length), begin_or_equal, begin_offset), - KeySelectorRef(KeyRef(end_key_name, end_key_name_length), end_or_equal, end_offset), - StringRef(mapper_name, mapper_name_length), - GetRangeLimits(limit, target_bytes), - snapshot, - reverse) - .extractPtr()); -} - -// TODO: Support FDB_API_ADDED in generate_asm.py and then this can be replaced with fdb_api_ptr_unimpl. -FDBFuture* fdb_transaction_get_range_and_flat_map_v699(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - fprintf(stderr, "UNIMPLEMENTED FDB API FUNCTION\n"); - abort(); -} - FDBFuture* fdb_transaction_get_range_selector_v13(FDBTransaction* tr, uint8_t const* begin_key_name, int begin_key_name_length, @@ -768,7 +702,6 @@ extern "C" DLLEXPORT fdb_error_t fdb_select_api_version_impl(int runtime_version // WARNING: use caution when implementing removed functions by calling public API functions. This can lead to // undesired behavior when using the multi-version API. Instead, it is better to have both the removed and public // functions call an internal implementation function. See fdb_create_database_impl for an example. - FDB_API_CHANGED(fdb_transaction_get_range_and_flat_map, 700); FDB_API_REMOVED(fdb_future_get_version, 620); FDB_API_REMOVED(fdb_create_cluster, 610); FDB_API_REMOVED(fdb_cluster_create_database, 610); diff --git a/bindings/c/foundationdb/fdb_c.h b/bindings/c/foundationdb/fdb_c.h index d3c65537c52..81bf10d8a8e 100644 --- a/bindings/c/foundationdb/fdb_c.h +++ b/bindings/c/foundationdb/fdb_c.h @@ -244,24 +244,6 @@ DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range(FDBTransaction fdb_bool_t reverse); #endif -DLLEXPORT WARN_UNUSED_RESULT FDBFuture* fdb_transaction_get_range_and_flat_map(FDBTransaction* tr, - uint8_t const* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - uint8_t const* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - uint8_t const* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); - DLLEXPORT void fdb_transaction_set(FDBTransaction* tr, uint8_t const* key_name, int key_name_length, diff --git a/bindings/c/test/unit/fdb_api.cpp b/bindings/c/test/unit/fdb_api.cpp index f15db95c62c..e59085eeb97 100644 --- a/bindings/c/test/unit/fdb_api.cpp +++ b/bindings/c/test/unit/fdb_api.cpp @@ -193,41 +193,6 @@ KeyValueArrayFuture Transaction::get_range(const uint8_t* begin_key_name, reverse)); } -KeyValueArrayFuture Transaction::get_range_and_flat_map(const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - return KeyValueArrayFuture(fdb_transaction_get_range_and_flat_map(tr_, - begin_key_name, - begin_key_name_length, - begin_or_equal, - begin_offset, - end_key_name, - end_key_name_length, - end_or_equal, - end_offset, - mapper_name, - mapper_name_length, - limit, - target_bytes, - mode, - iteration, - snapshot, - reverse)); -} - EmptyFuture Transaction::watch(std::string_view key) { return EmptyFuture(fdb_transaction_watch(tr_, (const uint8_t*)key.data(), key.size())); } diff --git a/bindings/c/test/unit/fdb_api.hpp b/bindings/c/test/unit/fdb_api.hpp index fb1304a26ee..17f25d55ee4 100644 --- a/bindings/c/test/unit/fdb_api.hpp +++ b/bindings/c/test/unit/fdb_api.hpp @@ -219,24 +219,6 @@ class Transaction final { fdb_bool_t snapshot, fdb_bool_t reverse); - // Returns a future which will be set to an FDBKeyValue array. - KeyValueArrayFuture get_range_and_flat_map(const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); - // Wrapper around fdb_transaction_watch. Returns a future representing an // empty value. EmptyFuture watch(std::string_view key); diff --git a/bindings/c/test/unit/unit_tests.cpp b/bindings/c/test/unit/unit_tests.cpp index f59c7f953bc..fe88e6b96f2 100644 --- a/bindings/c/test/unit/unit_tests.cpp +++ b/bindings/c/test/unit/unit_tests.cpp @@ -40,7 +40,6 @@ #define DOCTEST_CONFIG_IMPLEMENT #include "doctest.h" #include "fdbclient/rapidjson/document.h" -#include "fdbclient/Tuple.h" #include "flow/config.h" @@ -77,7 +76,7 @@ fdb_error_t wait_future(fdb::Future& f) { // Given a string s, returns the "lowest" string greater than any string that // starts with s. Taken from // https://github.com/apple/foundationdb/blob/e7d72f458c6a985fdfa677ae021f357d6f49945b/flow/flow.cpp#L223. -std::string strinc_str(const std::string& s) { +std::string strinc(const std::string& s) { int index = -1; for (index = s.size() - 1; index >= 0; --index) { if ((uint8_t)s[index] != 255) { @@ -93,16 +92,16 @@ std::string strinc_str(const std::string& s) { return r; } -TEST_CASE("strinc_str") { - CHECK(strinc_str("a").compare("b") == 0); - CHECK(strinc_str("y").compare("z") == 0); - CHECK(strinc_str("!").compare("\"") == 0); - CHECK(strinc_str("*").compare("+") == 0); - CHECK(strinc_str("fdb").compare("fdc") == 0); - CHECK(strinc_str("foundation database 6").compare("foundation database 7") == 0); +TEST_CASE("strinc") { + CHECK(strinc("a").compare("b") == 0); + CHECK(strinc("y").compare("z") == 0); + CHECK(strinc("!").compare("\"") == 0); + CHECK(strinc("*").compare("+") == 0); + CHECK(strinc("fdb").compare("fdc") == 0); + CHECK(strinc("foundation database 6").compare("foundation database 7") == 0); char terminated[] = { 'a', 'b', '\xff' }; - CHECK(strinc_str(std::string(terminated, 3)).compare("ac") == 0); + CHECK(strinc(std::string(terminated, 3)).compare("ac") == 0); } // Helper function to add `prefix` to all keys in the given map. Returns a new @@ -118,7 +117,7 @@ std::map create_data(std::map& data) { fdb::Transaction tr(db); - auto end_key = strinc_str(prefix); + auto end_key = strinc(prefix); while (1) { tr.clear_range(prefix, end_key); for (const auto& [key, val] : data) { @@ -225,59 +224,6 @@ GetRangeResult get_range(fdb::Transaction& tr, return GetRangeResult{ results, out_more != 0, 0 }; } -GetRangeResult get_range_and_flat_map(fdb::Transaction& tr, - const uint8_t* begin_key_name, - int begin_key_name_length, - fdb_bool_t begin_or_equal, - int begin_offset, - const uint8_t* end_key_name, - int end_key_name_length, - fdb_bool_t end_or_equal, - int end_offset, - const uint8_t* mapper_name, - int mapper_name_length, - int limit, - int target_bytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse) { - fdb::KeyValueArrayFuture f1 = tr.get_range_and_flat_map(begin_key_name, - begin_key_name_length, - begin_or_equal, - begin_offset, - end_key_name, - end_key_name_length, - end_or_equal, - end_offset, - mapper_name, - mapper_name_length, - limit, - target_bytes, - mode, - iteration, - snapshot, - reverse); - - fdb_error_t err = wait_future(f1); - if (err) { - return GetRangeResult{ {}, false, err }; - } - - const FDBKeyValue* out_kv; - int out_count; - fdb_bool_t out_more; - fdb_check(f1.get(&out_kv, &out_count, &out_more)); - - std::vector> results; - for (int i = 0; i < out_count; ++i) { - std::string key((const char*)out_kv[i].key, out_kv[i].key_length); - std::string value((const char*)out_kv[i].value, out_kv[i].value_length); - results.emplace_back(key, value); - } - return GetRangeResult{ results, out_more != 0, 0 }; -} - // Clears all data in the database. void clear_data(FDBDatabase* db) { insert_data(db, {}); @@ -873,86 +819,6 @@ TEST_CASE("fdb_transaction_set_read_version future_version") { CHECK(err == 1009); // future_version } -const std::string EMPTY = Tuple().pack().toString(); -const KeyRef RECORD = "RECORD"_sr; -const KeyRef INDEX = "INDEX"_sr; -static KeyRef primaryKey(const int i) { - return KeyRef(format("primary-key-of-record-%08d", i)); -} -static KeyRef indexKey(const int i) { - return KeyRef(format("index-key-of-record-%08d", i)); -} -static ValueRef dataOfRecord(const int i) { - return KeyRef(format("data-of-record-%08d", i)); -} -static std::string indexEntryKey(const int i) { - return Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack().toString(); -} -static std::string recordKey(const int i) { - return Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack().toString(); -} -static std::string recordValue(const int i) { - return Tuple().append(dataOfRecord(i)).pack().toString(); -} - -TEST_CASE("fdb_transaction_get_range_and_flat_map") { - // Note: The user requested `prefix` should be added as the first element of the tuple that forms the key, rather - // than the prefix of the key. So we don't use key() or create_data() in this test. - std::map data; - for (int i = 0; i < 3; i++) { - data[indexEntryKey(i)] = EMPTY; - data[recordKey(i)] = recordValue(i); - } - insert_data(db, data); - - std::string mapper = Tuple().append(prefix).append(RECORD).append("{K[3]}"_sr).pack().toString(); - - fdb::Transaction tr(db); - // get_range_and_flat_map is only support without RYW. This is a must!!! - fdb_check(tr.set_option(FDB_TR_OPTION_READ_YOUR_WRITES_DISABLE, nullptr, 0)); - while (1) { - auto result = get_range_and_flat_map( - tr, - // [0, 1] - FDB_KEYSEL_FIRST_GREATER_OR_EQUAL((const uint8_t*)indexEntryKey(0).c_str(), indexEntryKey(0).size()), - FDB_KEYSEL_FIRST_GREATER_THAN((const uint8_t*)indexEntryKey(1).c_str(), indexEntryKey(1).size()), - (const uint8_t*)mapper.c_str(), - mapper.size(), - /* limit */ 0, - /* target_bytes */ 0, - /* FDBStreamingMode */ FDB_STREAMING_MODE_WANT_ALL, - /* iteration */ 0, - /* snapshot */ false, - /* reverse */ 0); - - if (result.err) { - fdb::EmptyFuture f1 = tr.on_error(result.err); - fdb_check(wait_future(f1)); - continue; - } - - // Only the first 2 records are supposed to be returned. - if (result.kvs.size() < 2) { - CHECK(result.more); - // Retry. - continue; - } - - CHECK(result.kvs.size() == 2); - CHECK(!result.more); - for (int i = 0; i < 2; i++) { - const auto& [key, value] = result.kvs[i]; - std::cout << "result[" << i << "]: key=" << key << ", value=" << value << std::endl; - // OUTPUT: - // result[0]: key=fdbRECORDprimary-key-of-record-00000000, value=data-of-record-00000000 - // result[1]: key=fdbRECORDprimary-key-of-record-00000001, value=data-of-record-00000001 - CHECK(recordKey(i).compare(key) == 0); - CHECK(recordValue(i).compare(value) == 0); - } - break; - } -} - TEST_CASE("fdb_transaction_get_range reverse") { std::map data = create_data({ { "a", "1" }, { "b", "2" }, { "c", "3" }, { "d", "4" } }); insert_data(db, data); @@ -1860,7 +1726,7 @@ TEST_CASE("fdb_transaction_add_conflict_range") { fdb::Transaction tr2(db); while (1) { - fdb_check(tr2.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); + fdb_check(tr2.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); fdb::EmptyFuture f1 = tr2.commit(); fdb_error_t err = wait_future(f1); @@ -1873,8 +1739,8 @@ TEST_CASE("fdb_transaction_add_conflict_range") { } while (1) { - fdb_check(tr.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_READ)); - fdb_check(tr.add_conflict_range(key("a"), strinc_str(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); + fdb_check(tr.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_READ)); + fdb_check(tr.add_conflict_range(key("a"), strinc(key("a")), FDB_CONFLICT_RANGE_TYPE_WRITE)); fdb::EmptyFuture f1 = tr.commit(); fdb_error_t err = wait_future(f1); @@ -2351,7 +2217,7 @@ TEST_CASE("commit_does_not_reset") { continue; } - fdb_check(tr2.add_conflict_range(key("foo"), strinc_str(key("foo")), FDB_CONFLICT_RANGE_TYPE_READ)); + fdb_check(tr2.add_conflict_range(key("foo"), strinc(key("foo")), FDB_CONFLICT_RANGE_TYPE_READ)); tr2.set(key("foo"), "bar"); fdb::EmptyFuture tr2CommitFuture = tr2.commit(); err = wait_future(tr2CommitFuture); diff --git a/bindings/java/fdbJNI.cpp b/bindings/java/fdbJNI.cpp index 2a4ba668beb..587190d3a5e 100644 --- a/bindings/java/fdbJNI.cpp +++ b/bindings/java/fdbJNI.cpp @@ -756,76 +756,6 @@ JNIEXPORT jlong JNICALL Java_com_apple_foundationdb_FDBTransaction_Transaction_1 return (jlong)f; } -JNIEXPORT jlong JNICALL -Java_com_apple_foundationdb_FDBTransaction_Transaction_1getRangeAndFlatMap(JNIEnv* jenv, - jobject, - jlong tPtr, - jbyteArray keyBeginBytes, - jboolean orEqualBegin, - jint offsetBegin, - jbyteArray keyEndBytes, - jboolean orEqualEnd, - jint offsetEnd, - jbyteArray mapperBytes, - jint rowLimit, - jint targetBytes, - jint streamingMode, - jint iteration, - jboolean snapshot, - jboolean reverse) { - if (!tPtr || !keyBeginBytes || !keyEndBytes || !mapperBytes) { - throwParamNotNull(jenv); - return 0; - } - FDBTransaction* tr = (FDBTransaction*)tPtr; - - uint8_t* barrBegin = (uint8_t*)jenv->GetByteArrayElements(keyBeginBytes, JNI_NULL); - if (!barrBegin) { - if (!jenv->ExceptionOccurred()) - throwRuntimeEx(jenv, "Error getting handle to native resources"); - return 0; - } - - uint8_t* barrEnd = (uint8_t*)jenv->GetByteArrayElements(keyEndBytes, JNI_NULL); - if (!barrEnd) { - jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); - if (!jenv->ExceptionOccurred()) - throwRuntimeEx(jenv, "Error getting handle to native resources"); - return 0; - } - - uint8_t* barrMapper = (uint8_t*)jenv->GetByteArrayElements(mapperBytes, JNI_NULL); - if (!barrMapper) { - jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); - jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); - if (!jenv->ExceptionOccurred()) - throwRuntimeEx(jenv, "Error getting handle to native resources"); - return 0; - } - - FDBFuture* f = fdb_transaction_get_range_and_flat_map(tr, - barrBegin, - jenv->GetArrayLength(keyBeginBytes), - orEqualBegin, - offsetBegin, - barrEnd, - jenv->GetArrayLength(keyEndBytes), - orEqualEnd, - offsetEnd, - barrMapper, - jenv->GetArrayLength(mapperBytes), - rowLimit, - targetBytes, - (FDBStreamingMode)streamingMode, - iteration, - snapshot, - reverse); - jenv->ReleaseByteArrayElements(keyBeginBytes, (jbyte*)barrBegin, JNI_ABORT); - jenv->ReleaseByteArrayElements(keyEndBytes, (jbyte*)barrEnd, JNI_ABORT); - jenv->ReleaseByteArrayElements(mapperBytes, (jbyte*)barrMapper, JNI_ABORT); - return (jlong)f; -} - JNIEXPORT void JNICALL Java_com_apple_foundationdb_FutureResults_FutureResults_1getDirect(JNIEnv* jenv, jobject, jlong future, diff --git a/bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java b/bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java deleted file mode 100644 index 7f418b5b2d0..00000000000 --- a/bindings/java/src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java +++ /dev/null @@ -1,253 +0,0 @@ -/* - * RangeAndFlatMapQueryIntegrationTest.java - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.apple.foundationdb; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicReference; - -import com.apple.foundationdb.async.AsyncIterable; -import com.apple.foundationdb.async.AsyncUtil; -import com.apple.foundationdb.tuple.ByteArrayUtil; -import com.apple.foundationdb.tuple.Tuple; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; - -@ExtendWith(RequiresDatabase.class) -class RangeAndFlatMapQueryIntegrationTest { - private static final FDB fdb = FDB.selectAPIVersion(710); - public String databaseArg = null; - private Database openFDB() { return fdb.open(databaseArg); } - - @BeforeEach - @AfterEach - void clearDatabase() throws Exception { - /* - * Empty the database before and after each run, just in case - */ - try (Database db = openFDB()) { - db.run(tr -> { - tr.clear(Range.startsWith(new byte[] { (byte)0x00 })); - return null; - }); - } - } - - static private final byte[] EMPTY = Tuple.from().pack(); - static private final String PREFIX = "prefix"; - static private final String RECORD = "RECORD"; - static private final String INDEX = "INDEX"; - static private String primaryKey(int i) { return String.format("primary-key-of-record-%08d", i); } - static private String indexKey(int i) { return String.format("index-key-of-record-%08d", i); } - static private String dataOfRecord(int i) { return String.format("data-of-record-%08d", i); } - - static byte[] HOP_INFO = Tuple.from(PREFIX, RECORD, "{K[3]}").pack(); - static private byte[] indexEntryKey(final int i) { - return Tuple.from(PREFIX, INDEX, indexKey(i), primaryKey(i)).pack(); - } - static private byte[] recordKey(final int i) { return Tuple.from(PREFIX, RECORD, primaryKey(i)).pack(); } - static private byte[] recordValue(final int i) { return Tuple.from(dataOfRecord(i)).pack(); } - - static private void insertRecordWithIndex(final Transaction tr, final int i) { - tr.set(indexEntryKey(i), EMPTY); - tr.set(recordKey(i), recordValue(i)); - } - - private static String getArgFromEnv() { - String[] clusterFiles = MultiClientHelper.readClusterFromEnv(); - String cluster = clusterFiles[0]; - System.out.printf("Using Cluster: %s\n", cluster); - return cluster; - } - public static void main(String[] args) throws Exception { - final RangeAndFlatMapQueryIntegrationTest test = new RangeAndFlatMapQueryIntegrationTest(); - test.databaseArg = getArgFromEnv(); - test.clearDatabase(); - test.comparePerformance(); - test.clearDatabase(); - } - - int numRecords = 10000; - int numQueries = 10000; - int numRecordsPerQuery = 100; - boolean validate = false; - @Test - void comparePerformance() { - FDB fdb = FDB.selectAPIVersion(710); - try (Database db = openFDB()) { - insertRecordsWithIndexes(numRecords, db); - instrument(rangeQueryAndGet, "rangeQueryAndGet", db); - instrument(rangeQueryAndFlatMap, "rangeQueryAndFlatMap", db); - } - } - - private void instrument(final RangeQueryWithIndex query, final String name, final Database db) { - System.out.printf("Starting %s (numQueries:%d, numRecordsPerQuery:%d)\n", name, numQueries, numRecordsPerQuery); - long startTime = System.currentTimeMillis(); - for (int queryId = 0; queryId < numQueries; queryId++) { - int begin = ThreadLocalRandom.current().nextInt(numRecords - numRecordsPerQuery); - query.run(begin, begin + numRecordsPerQuery, db); - } - long time = System.currentTimeMillis() - startTime; - System.out.printf("Finished %s, it takes %d ms for %d queries (%d qps)\n", name, time, numQueries, - numQueries * 1000L / time); - } - - static private final int RECORDS_PER_TXN = 100; - static private void insertRecordsWithIndexes(int n, Database db) { - int i = 0; - while (i < n) { - int begin = i; - int end = Math.min(n, i + RECORDS_PER_TXN); - // insert [begin, end) in one transaction - db.run(tr -> { - for (int t = begin; t < end; t++) { - insertRecordWithIndex(tr, t); - } - return null; - }); - i = end; - } - } - - public interface RangeQueryWithIndex { - void run(int begin, int end, Database db); - } - - RangeQueryWithIndex rangeQueryAndGet = (int begin, int end, Database db) -> db.run(tr -> { - try { - List kvs = tr.getRange(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), - KeySelector.firstGreaterOrEqual(indexEntryKey(end)), - ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) - .asList() - .get(); - Assertions.assertEquals(end - begin, kvs.size()); - - // Get the records of each index entry IN PARALLEL. - List> resultFutures = new ArrayList<>(); - // In reality, we need to get the record key by parsing the index entry key. But considering this is a - // performance test, we just ignore the returned key and simply generate it from recordKey. - for (int id = begin; id < end; id++) { - resultFutures.add(tr.get(recordKey(id))); - } - AsyncUtil.whenAll(resultFutures).get(); - - if (validate) { - final Iterator indexes = kvs.iterator(); - final Iterator> records = resultFutures.iterator(); - for (int id = begin; id < end; id++) { - Assertions.assertTrue(indexes.hasNext()); - assertByteArrayEquals(indexEntryKey(id), indexes.next().getKey()); - Assertions.assertTrue(records.hasNext()); - assertByteArrayEquals(recordValue(id), records.next().get()); - } - Assertions.assertFalse(indexes.hasNext()); - Assertions.assertFalse(records.hasNext()); - } - } catch (Exception e) { - Assertions.fail("Unexpected exception", e); - } - return null; - }); - - RangeQueryWithIndex rangeQueryAndFlatMap = (int begin, int end, Database db) -> db.run(tr -> { - try { - tr.options().setReadYourWritesDisable(); - List kvs = - tr.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(begin)), - KeySelector.firstGreaterOrEqual(indexEntryKey(end)), HOP_INFO, - ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) - .asList() - .get(); - Assertions.assertEquals(end - begin, kvs.size()); - - if (validate) { - final Iterator results = kvs.iterator(); - for (int id = begin; id < end; id++) { - Assertions.assertTrue(results.hasNext()); - assertByteArrayEquals(recordValue(id), results.next().getValue()); - } - Assertions.assertFalse(results.hasNext()); - } - } catch (Exception e) { - Assertions.fail("Unexpected exception", e); - } - return null; - }); - - void assertByteArrayEquals(byte[] expected, byte[] actual) { - Assertions.assertEquals(ByteArrayUtil.printable(expected), ByteArrayUtil.printable(actual)); - } - - @Test - void rangeAndFlatMapQueryOverMultipleRows() throws Exception { - try (Database db = openFDB()) { - insertRecordsWithIndexes(3, db); - - List expected_data_of_records = new ArrayList<>(); - for (int i = 0; i <= 1; i++) { - expected_data_of_records.add(recordValue(i)); - } - - db.run(tr -> { - // getRangeAndFlatMap is only support without RYW. This is a must!!! - tr.options().setReadYourWritesDisable(); - - Iterator kvs = - tr.getRangeAndFlatMap(KeySelector.firstGreaterOrEqual(indexEntryKey(0)), - KeySelector.firstGreaterThan(indexEntryKey(1)), HOP_INFO, - ReadTransaction.ROW_LIMIT_UNLIMITED, false, StreamingMode.WANT_ALL) - .iterator(); - Iterator expected_data_of_records_iter = expected_data_of_records.iterator(); - while (expected_data_of_records_iter.hasNext()) { - Assertions.assertTrue(kvs.hasNext(), "iterator ended too early"); - KeyValue kv = kvs.next(); - byte[] actual_data_of_record = kv.getValue(); - byte[] expected_data_of_record = expected_data_of_records_iter.next(); - - // System.out.println("result key:" + ByteArrayUtil.printable(kv.getKey()) + " value:" + - // ByteArrayUtil.printable(kv.getValue())); Output: - // result - // key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-0\x00\x02primary-key-of-record-0\x00 - // value:\x02data-of-record-0\x00 - // result - // key:\x02prefix\x00\x02INDEX\x00\x02index-key-of-record-1\x00\x02primary-key-of-record-1\x00 - // value:\x02data-of-record-1\x00 - - // For now, we don't guarantee what that the returned keys mean. - Assertions.assertArrayEquals(expected_data_of_record, actual_data_of_record, - "Incorrect data of record!"); - } - Assertions.assertFalse(kvs.hasNext(), "Iterator returned too much data"); - - return null; - }); - } - } -} diff --git a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java index 0c5a121c64a..f154790b2b5 100644 --- a/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java +++ b/bindings/java/src/junit/com/apple/foundationdb/FakeFDBTransaction.java @@ -88,11 +88,8 @@ public CompletableFuture get(byte[] key) { public int getNumRangeCalls() { return numRangeCalls; } @Override - protected FutureResults getRange_internal(KeySelector begin, KeySelector end, - // TODO: map is not supported in FakeFDBTransaction yet. - byte[] mapper, // Nullable - int rowLimit, int targetBytes, int streamingMode, int iteration, - boolean isSnapshot, boolean reverse) { + protected FutureResults getRange_internal(KeySelector begin, KeySelector end, int rowLimit, int targetBytes, + int streamingMode, int iteration, boolean isSnapshot, boolean reverse) { numRangeCalls++; // TODO this is probably not correct for all KeySelector instances--we'll want to match with real behavior NavigableMap range = diff --git a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java index 9bd99c892d6..05431a0fba1 100644 --- a/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/FDBTransaction.java @@ -91,15 +91,6 @@ public CompletableFuture getRangeSplitPoints(Range range, long c return FDBTransaction.this.getRangeSplitPoints(range, chunkSize); } - @Override - public AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, - boolean reverse, StreamingMode mode) { - if (mapper == null) { - throw new IllegalArgumentException("Mapper must be non-null"); - } - return new RangeQuery(FDBTransaction.this, true, begin, end, mapper, limit, reverse, mode, eventKeeper); - } - /////////////////// // getRange -> KeySelectors /////////////////// @@ -347,15 +338,6 @@ public CompletableFuture getRangeSplitPoints(Range range, long c return this.getRangeSplitPoints(range.begin, range.end, chunkSize); } - @Override - public AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, - boolean reverse, StreamingMode mode) { - if (mapper == null) { - throw new IllegalArgumentException("Mapper must be non-null"); - } - return new RangeQuery(this, false, begin, end, mapper, limit, reverse, mode, eventKeeper); - } - /////////////////// // getRange -> KeySelectors /////////////////// @@ -433,10 +415,10 @@ public Database getDatabase() { } // Users of this function must close the returned FutureResults when finished - protected FutureResults getRange_internal(KeySelector begin, KeySelector end, - byte[] mapper, // Nullable - int rowLimit, int targetBytes, int streamingMode, int iteration, - boolean isSnapshot, boolean reverse) { + protected FutureResults getRange_internal( + KeySelector begin, KeySelector end, + int rowLimit, int targetBytes, int streamingMode, + int iteration, boolean isSnapshot, boolean reverse) { if (eventKeeper != null) { eventKeeper.increment(Events.JNI_CALL); } @@ -447,14 +429,10 @@ protected FutureResults getRange_internal(KeySelector begin, KeySelector end, begin.toString(), end.toString(), rowLimit, targetBytes, streamingMode, iteration, Boolean.toString(isSnapshot), Boolean.toString(reverse)));*/ return new FutureResults( - mapper == null - ? Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), end.getKey(), - end.orEqual(), end.getOffset(), rowLimit, targetBytes, streamingMode, - iteration, isSnapshot, reverse) - : Transaction_getRangeAndFlatMap(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), - end.getKey(), end.orEqual(), end.getOffset(), mapper, rowLimit, - targetBytes, streamingMode, iteration, isSnapshot, reverse), - FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper); + Transaction_getRange(getPtr(), begin.getKey(), begin.orEqual(), begin.getOffset(), + end.getKey(), end.orEqual(), end.getOffset(), rowLimit, targetBytes, + streamingMode, iteration, isSnapshot, reverse), + FDB.instance().isDirectBufferQueriesEnabled(), executor, eventKeeper); } finally { pointerReadLock.unlock(); } @@ -793,12 +771,6 @@ private native long Transaction_getRange(long cPtr, byte[] keyEnd, boolean orEqualEnd, int offsetEnd, int rowLimit, int targetBytes, int streamingMode, int iteration, boolean isSnapshot, boolean reverse); - private native long Transaction_getRangeAndFlatMap(long cPtr, byte[] keyBegin, boolean orEqualBegin, - int offsetBegin, byte[] keyEnd, boolean orEqualEnd, - int offsetEnd, - byte[] mapper, // Nonnull - int rowLimit, int targetBytes, int streamingMode, int iteration, - boolean isSnapshot, boolean reverse); private native void Transaction_addConflictRange(long cPtr, byte[] keyBegin, byte[] keyEnd, int conflictRangeType); private native void Transaction_set(long cPtr, byte[] key, byte[] value); diff --git a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java index f91b00471ad..d518a0b9db5 100644 --- a/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java +++ b/bindings/java/src/main/com/apple/foundationdb/RangeQuery.java @@ -49,19 +49,17 @@ class RangeQuery implements AsyncIterable { private final FDBTransaction tr; private final KeySelector begin; private final KeySelector end; - private final byte[] mapper; // Nullable private final boolean snapshot; private final int rowLimit; private final boolean reverse; private final StreamingMode streamingMode; private final EventKeeper eventKeeper; - RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, byte[] mapper, - int rowLimit, boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { + RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit, + boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { this.tr = transaction; this.begin = begin; this.end = end; - this.mapper = mapper; this.snapshot = isSnapshot; this.rowLimit = rowLimit; this.reverse = reverse; @@ -69,12 +67,6 @@ class RangeQuery implements AsyncIterable { this.eventKeeper = eventKeeper; } - // RangeQueryAndFlatMap - RangeQuery(FDBTransaction transaction, boolean isSnapshot, KeySelector begin, KeySelector end, int rowLimit, - boolean reverse, StreamingMode streamingMode, EventKeeper eventKeeper) { - this(transaction, isSnapshot, begin, end, null, rowLimit, reverse, streamingMode, eventKeeper); - } - /** * Returns all the results from the range requested as a {@code List}. If there were no * limits on the original query and there is a large amount of data in the database @@ -91,16 +83,16 @@ public CompletableFuture> asList() { // if the streaming mode is EXACT, try and grab things as one chunk if(mode == StreamingMode.EXACT) { - - FutureResults range = tr.getRange_internal(this.begin, this.end, this.mapper, this.rowLimit, 0, - StreamingMode.EXACT.code(), 1, this.snapshot, this.reverse); + FutureResults range = tr.getRange_internal( + this.begin, this.end, this.rowLimit, 0, StreamingMode.EXACT.code(), + 1, this.snapshot, this.reverse); return range.thenApply(result -> result.get().values) .whenComplete((result, e) -> range.close()); } // If the streaming mode is not EXACT, simply collect the results of an // iteration into a list - return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, mapper, rowLimit, reverse, mode, eventKeeper), + return AsyncUtil.collect(new RangeQuery(tr, snapshot, begin, end, rowLimit, reverse, mode, eventKeeper), tr.getExecutor()); } @@ -229,8 +221,8 @@ private synchronized void startNextFetch() { nextFuture = new CompletableFuture<>(); final long sTime = System.nanoTime(); - fetchingChunk = tr.getRange_internal(begin, end, mapper, rowsLimited ? rowsRemaining : 0, 0, - streamingMode.code(), ++iteration, snapshot, reverse); + fetchingChunk = tr.getRange_internal(begin, end, rowsLimited ? rowsRemaining : 0, 0, streamingMode.code(), + ++iteration, snapshot, reverse); BiConsumer cons = new FetchComplete(fetchingChunk,nextFuture); if(eventKeeper!=null){ diff --git a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java index 699dfd3ec09..1dabc08c930 100644 --- a/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java +++ b/bindings/java/src/main/com/apple/foundationdb/ReadTransaction.java @@ -424,42 +424,6 @@ AsyncIterable getRange(Range range, AsyncIterable getRange(Range range, int limit, boolean reverse, StreamingMode mode); - /** - * Gets an ordered range of keys and values from the database. The begin - * and end keys are specified by {@code KeySelector}s, with the begin - * {@code KeySelector} inclusive and the end {@code KeySelector} exclusive. - * - * @see KeySelector - * @see AsyncIterator - * - * @param begin the beginning of the range (inclusive) - * @param end the end of the range (exclusive) - * @param mapper TODO - * @param limit the maximum number of results to return. Limits results to the - * first keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query - * should not limit the number of results. If {@code reverse} is {@code true} rows - * will be limited starting at the end of the range. - * @param reverse return results starting at the end of the range in reverse order. - * Reading ranges in reverse is supported natively by the database and should - * have minimal extra cost. - * @param mode provide a hint about how the results are to be used. This - * can provide speed improvements or efficiency gains based on the caller's - * knowledge of the upcoming access pattern. - * - *

- * When converting the result of this query to a list using {@link AsyncIterable#asList()} with the {@code - * ITERATOR} streaming mode, the query is automatically modified to fetch results in larger batches. This is done - * because it is known in advance that the {@link AsyncIterable#asList()} function will fetch all results in the - * range. If a limit is specified, the {@code EXACT} streaming mode will be used, and otherwise it will use {@code - * WANT_ALL}. - * - * To achieve comparable performance when iterating over an entire range without using {@link - * AsyncIterable#asList()}, the same streaming mode would need to be used. - *

- * @return a handle to access the results of the asynchronous call - */ - AsyncIterable getRangeAndFlatMap(KeySelector begin, KeySelector end, byte[] mapper, int limit, - boolean reverse, StreamingMode mode); /** * Gets an estimate for the number of bytes stored in the given range. diff --git a/bindings/java/src/tests.cmake b/bindings/java/src/tests.cmake index b84c148ac25..3e9dce6657f 100644 --- a/bindings/java/src/tests.cmake +++ b/bindings/java/src/tests.cmake @@ -52,7 +52,6 @@ set(JAVA_INTEGRATION_TESTS src/integration/com/apple/foundationdb/CycleMultiClientIntegrationTest.java src/integration/com/apple/foundationdb/SidebandMultiThreadClientTest.java src/integration/com/apple/foundationdb/RepeatableReadMultiThreadClientTest.java - src/integration/com/apple/foundationdb/RangeAndFlatMapQueryIntegrationTest.java ) # Resources that are used in integration testing, but are not explicitly test files (JUnit rules, diff --git a/documentation/sphinx/source/release-notes/release-notes-700.rst b/documentation/sphinx/source/release-notes/release-notes-700.rst index a1abd083665..770c4c9af5e 100644 --- a/documentation/sphinx/source/release-notes/release-notes-700.rst +++ b/documentation/sphinx/source/release-notes/release-notes-700.rst @@ -30,7 +30,6 @@ Features * Improved the efficiency with which storage servers replicate data between themselves. `(PR #5017) `_ * Added support to ``exclude command`` to exclude based on locality match. `(PR #5113) `_ * Add the ``trace_partial_file_suffix`` network option. This option will give unfinished trace files a special suffix to indicate they're not complete yet. When the trace file is complete, it is renamed to remove the suffix. `(PR #5328) `_ -* Added "get range and flat map" feature with new APIs (see Bindings section). Storage servers are able to generate the keys in the queries based on another query. With this, upper layer can push some computations down to FDB, to improve latency and bandwidth when read. `(PR #5609) `_ Performance ----------- @@ -87,8 +86,6 @@ Bindings * C: Added a function, ``fdb_database_create_snapshot``, to create a snapshot of the database. `(PR #4241) `_ * C: Added ``fdb_database_get_main_thread_busyness`` function to report how busy a client's main thread is. `(PR #4504) `_ * Java: Added ``Database.getMainThreadBusyness`` function to report how busy a client's main thread is. `(PR #4564) `_ -* C: Added ``fdb_transaction_get_range_and_flat_map`` function to support running queries based on another query in one request. `(PR #5609) `_ -* Java: Added ``Transaction.getRangeAndFlatMap`` function to support running queries based on another query in one request. `(PR #5609) `_ Other Changes ------------- diff --git a/fdbclient/DatabaseContext.h b/fdbclient/DatabaseContext.h index 4c038a6fc0b..837d4ec7933 100644 --- a/fdbclient/DatabaseContext.h +++ b/fdbclient/DatabaseContext.h @@ -369,7 +369,6 @@ class DatabaseContext : public ReferenceCounted, public FastAll Counter transactionGetKeyRequests; Counter transactionGetValueRequests; Counter transactionGetRangeRequests; - Counter transactionGetRangeAndFlatMapRequests; Counter transactionGetRangeStreamRequests; Counter transactionWatchRequests; Counter transactionGetAddressesForKeyRequests; diff --git a/fdbclient/IClientApi.h b/fdbclient/IClientApi.h index 5562ad3ba6e..cf304202bb6 100644 --- a/fdbclient/IClientApi.h +++ b/fdbclient/IClientApi.h @@ -59,12 +59,6 @@ class ITransaction { GetRangeLimits limits, bool snapshot = false, bool reverse = false) = 0; - virtual ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot = false, - bool reverse = false) = 0; virtual ThreadFuture>> getAddressesForKey(const KeyRef& key) = 0; virtual ThreadFuture> getVersionstamp() = 0; diff --git a/fdbclient/ISingleThreadTransaction.h b/fdbclient/ISingleThreadTransaction.h index 62336a15d72..9228184593f 100644 --- a/fdbclient/ISingleThreadTransaction.h +++ b/fdbclient/ISingleThreadTransaction.h @@ -63,12 +63,6 @@ class ISingleThreadTransaction : public ReferenceCounted getRangeAndFlatMap(KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) = 0; virtual Future>> getAddressesForKey(Key const& key) = 0; virtual Future>> getRangeSplitPoints(KeyRange const& range, int64_t chunkSize) = 0; virtual Future getEstimatedRangeSizeBytes(KeyRange const& keys) = 0; diff --git a/fdbclient/MultiVersionTransaction.actor.cpp b/fdbclient/MultiVersionTransaction.actor.cpp index 32603d1f53d..9d701439d92 100644 --- a/fdbclient/MultiVersionTransaction.actor.cpp +++ b/fdbclient/MultiVersionTransaction.actor.cpp @@ -141,41 +141,6 @@ ThreadFuture DLTransaction::getRange(const KeyRangeRef& keys, return getRange(firstGreaterOrEqual(keys.begin), firstGreaterOrEqual(keys.end), limits, snapshot, reverse); } -ThreadFuture DLTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) { - FdbCApi::FDBFuture* f = api->transactionGetRangeAndFlatMap(tr, - begin.getKey().begin(), - begin.getKey().size(), - begin.orEqual, - begin.offset, - end.getKey().begin(), - end.getKey().size(), - end.orEqual, - end.offset, - mapper.begin(), - mapper.size(), - limits.rows, - limits.bytes, - FDB_STREAMING_MODE_EXACT, - 0, - snapshot, - reverse); - return toThreadFuture(api, f, [](FdbCApi::FDBFuture* f, FdbCApi* api) { - const FdbCApi::FDBKeyValue* kvs; - int count; - FdbCApi::fdb_bool_t more; - FdbCApi::fdb_error_t error = api->futureGetKeyValueArray(f, &kvs, &count, &more); - ASSERT(!error); - - // The memory for this is stored in the FDBFuture and is released when the future gets destroyed - return RangeResult(RangeResultRef(VectorRef((KeyValueRef*)kvs, count), more), Arena()); - }); -} - ThreadFuture>> DLTransaction::getAddressesForKey(const KeyRef& key) { FdbCApi::FDBFuture* f = api->transactionGetAddressesForKey(tr, key.begin(), key.size()); @@ -487,7 +452,6 @@ void DLApi::init() { loadClientFunction(&api->transactionGetKey, lib, fdbCPath, "fdb_transaction_get_key"); loadClientFunction(&api->transactionGetAddressesForKey, lib, fdbCPath, "fdb_transaction_get_addresses_for_key"); loadClientFunction(&api->transactionGetRange, lib, fdbCPath, "fdb_transaction_get_range"); - loadClientFunction(&api->transactionGetRangeAndFlatMap, lib, fdbCPath, "fdb_transaction_get_range_and_flat_map"); loadClientFunction( &api->transactionGetVersionstamp, lib, fdbCPath, "fdb_transaction_get_versionstamp", headerVersion >= 410); loadClientFunction(&api->transactionSet, lib, fdbCPath, "fdb_transaction_set"); @@ -767,18 +731,6 @@ ThreadFuture MultiVersionTransaction::getRange(const KeyRangeRef& k return abortableFuture(f, tr.onChange); } -ThreadFuture MultiVersionTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) { - auto tr = getTransaction(); - auto f = tr.transaction ? tr.transaction->getRangeAndFlatMap(begin, end, mapper, limits, snapshot, reverse) - : makeTimeout(); - return abortableFuture(f, tr.onChange); -} - ThreadFuture> MultiVersionTransaction::getVersionstamp() { auto tr = getTransaction(); auto f = tr.transaction ? tr.transaction->getVersionstamp() : makeTimeout>(); diff --git a/fdbclient/MultiVersionTransaction.h b/fdbclient/MultiVersionTransaction.h index 50e21bca576..95d9a8b14cc 100644 --- a/fdbclient/MultiVersionTransaction.h +++ b/fdbclient/MultiVersionTransaction.h @@ -118,23 +118,6 @@ struct FdbCApi : public ThreadSafeReferenceCounted { int iteration, fdb_bool_t snapshot, fdb_bool_t reverse); - FDBFuture* (*transactionGetRangeAndFlatMap)(FDBTransaction* tr, - uint8_t const* beginKeyName, - int beginKeyNameLength, - fdb_bool_t beginOrEqual, - int beginOffset, - uint8_t const* endKeyName, - int endKeyNameLength, - fdb_bool_t endOrEqual, - int endOffset, - uint8_t const* mapper_name, - int mapper_name_length, - int limit, - int targetBytes, - FDBStreamingMode mode, - int iteration, - fdb_bool_t snapshot, - fdb_bool_t reverse); FDBFuture* (*transactionGetVersionstamp)(FDBTransaction* tr); void (*transactionSet)(FDBTransaction* tr, @@ -236,12 +219,6 @@ class DLTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; @@ -383,12 +360,6 @@ class MultiVersionTransaction : public ITransaction, ThreadSafeReferenceCounted< GetRangeLimits limits, bool snapshot = false, bool reverse = false) override; - ThreadFuture getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; diff --git a/fdbclient/NativeAPI.actor.cpp b/fdbclient/NativeAPI.actor.cpp index 91c80a5ad9d..117524a43a3 100644 --- a/fdbclient/NativeAPI.actor.cpp +++ b/fdbclient/NativeAPI.actor.cpp @@ -160,8 +160,6 @@ void DatabaseContext::addTssMapping(StorageServerInterface const& ssi, StorageSe TSSEndpointData(tssi.id(), tssi.getKey.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValues.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValues.getEndpoint(), metrics)); - queueModel.updateTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first(), - TSSEndpointData(tssi.id(), tssi.getKeyValuesAndFlatMap.getEndpoint(), metrics)); queueModel.updateTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first(), TSSEndpointData(tssi.id(), tssi.getKeyValuesStream.getEndpoint(), metrics)); @@ -185,7 +183,6 @@ void DatabaseContext::removeTssMapping(StorageServerInterface const& ssi) { queueModel.removeTssEndpoint(ssi.getValue.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKey.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValues.getEndpoint().token.first()); - queueModel.removeTssEndpoint(ssi.getKeyValuesAndFlatMap.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.getKeyValuesStream.getEndpoint().token.first()); queueModel.removeTssEndpoint(ssi.watchValue.getEndpoint().token.first()); @@ -1199,7 +1196,6 @@ DatabaseContext::DatabaseContext(Reference watchValueMap(Future version, return Void(); } -template -void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesFamilyRequest& req) { +void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesRequest& req) { if (limits.bytes != 0) { if (!limits.hasRowLimit()) req.limit = CLIENT_KNOBS->REPLY_BYTE_LIMIT; // Can't get more than this many rows anyway @@ -3055,47 +3049,26 @@ void transformRangeLimits(GetRangeLimits limits, Reverse reverse, GetKeyValuesFa } } -template -RequestStream StorageServerInterface::*getRangeRequestStream() { - if constexpr (std::is_same::value) { - return &StorageServerInterface::getKeyValues; - } else if (std::is_same::value) { - return &StorageServerInterface::getKeyValuesAndFlatMap; - } else { - UNREACHABLE(); - } -} - -ACTOR template -Future getExactRange(Database cx, - Version version, - KeyRange keys, - Key mapper, - GetRangeLimits limits, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +ACTOR Future getExactRange(Database cx, + Version version, + KeyRange keys, + GetRangeLimits limits, + Reverse reverse, + TransactionInfo info, + TagSet tags) { state RangeResult output; state Span span("NAPI:getExactRange"_loc, info.spanID); // printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str()); loop { - state std::vector>> locations = - wait(getKeyRangeLocations(cx, - keys, - CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, - reverse, - getRangeRequestStream(), - info)); + state std::vector>> locations = wait(getKeyRangeLocations( + cx, keys, CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, reverse, &StorageServerInterface::getKeyValues, info)); ASSERT(locations.size()); state int shard = 0; loop { const KeyRangeRef& range = locations[shard].first; - GetKeyValuesFamilyRequest req; - req.mapper = mapper; - req.arena.dependsOn(mapper.arena()); - + GetKeyValuesRequest req; req.version = version; req.begin = firstGreaterOrEqual(range.begin); req.end = firstGreaterOrEqual(range.end); @@ -3125,14 +3098,14 @@ Future getExactRange(Database cx, .detail("Servers", locations[shard].second->description());*/ } ++cx->transactionPhysicalReads; - state GetKeyValuesFamilyReply rep; + state GetKeyValuesReply rep; try { choose { when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); } - when(GetKeyValuesFamilyReply _rep = + when(GetKeyValuesReply _rep = wait(loadBalance(cx.getPtr(), locations[shard].second, - getRangeRequestStream(), + &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3182,7 +3155,7 @@ Future getExactRange(Database cx, .detail("BlockBytes", rep.data.expectedSize()); ASSERT(false); } - TEST(true); // GetKeyValuesFamilyReply.more in getExactRange + TEST(true); // GetKeyValuesReply.more in getExactRange // Make next request to the same shard with a beginning key just after the last key returned if (reverse) locations[shard].first = @@ -3258,16 +3231,14 @@ Future resolveKey(Database const& cx, return getKey(cx, key, version, info, tags); } -ACTOR template -Future getRangeFallback(Database cx, - Version version, - KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +ACTOR Future getRangeFallback(Database cx, + Version version, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Reverse reverse, + TransactionInfo info, + TagSet tags) { if (version == latestVersion) { state Transaction transaction(cx); transaction.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY); @@ -3290,8 +3261,7 @@ Future getRangeFallback(Database cx, // if b is allKeys.begin, we have either read through the beginning of the database, // or allKeys.begin exists in the database and will be part of the conflict range anyways - RangeResult _r = wait(getExactRange( - cx, version, KeyRangeRef(b, e), mapper, limits, reverse, info, tags)); + RangeResult _r = wait(getExactRange(cx, version, KeyRangeRef(b, e), limits, reverse, info, tags)); RangeResult r = _r; if (b == allKeys.begin && ((reverse && !r.more) || !reverse)) @@ -3316,7 +3286,6 @@ Future getRangeFallback(Database cx, return r; } -// TODO: Client should add mapped keys to conflict ranges. void getRangeFinished(Database cx, Reference trLogInfo, double startTime, @@ -3371,23 +3340,17 @@ void getRangeFinished(Database cx, } } -// GetKeyValuesFamilyRequest: GetKeyValuesRequest or GetKeyValuesAndFlatMapRequest -// GetKeyValuesFamilyReply: GetKeyValuesReply or GetKeyValuesAndFlatMapReply -// Sadly we need GetKeyValuesFamilyReply because cannot do something like: state -// REPLY_TYPE(GetKeyValuesFamilyRequest) rep; -ACTOR template -Future getRange(Database cx, - Reference trLogInfo, - Future fVersion, - KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Promise> conflictRange, - Snapshot snapshot, - Reverse reverse, - TransactionInfo info, - TagSet tags) { +ACTOR Future getRange(Database cx, + Reference trLogInfo, + Future fVersion, + KeySelector begin, + KeySelector end, + GetRangeLimits limits, + Promise> conflictRange, + Snapshot snapshot, + Reverse reverse, + TransactionInfo info, + TagSet tags) { state GetRangeLimits originalLimits(limits); state KeySelector originalBegin = begin; state KeySelector originalEnd = end; @@ -3421,13 +3384,11 @@ Future getRange(Database cx, Key locationKey = reverse ? Key(end.getKey(), end.arena()) : Key(begin.getKey(), begin.arena()); Reverse locationBackward{ reverse ? (end - 1).isBackward() : begin.isBackward() }; - state std::pair> beginServer = wait(getKeyLocation( - cx, locationKey, getRangeRequestStream(), info, locationBackward)); + state std::pair> beginServer = + wait(getKeyLocation(cx, locationKey, &StorageServerInterface::getKeyValues, info, locationBackward)); state KeyRange shard = beginServer.first; state bool modifiedSelectors = false; - state GetKeyValuesFamilyRequest req; - req.mapper = mapper; - req.arena.dependsOn(mapper.arena()); + state GetKeyValuesRequest req; req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys); req.version = readVersion; @@ -3486,17 +3447,17 @@ Future getRange(Database cx, } ++cx->transactionPhysicalReads; - state GetKeyValuesFamilyReply rep; + state GetKeyValuesReply rep; try { if (CLIENT_BUGGIFY_WITH_PROB(.01)) { throw deterministicRandom()->randomChoice( std::vector{ transaction_too_old(), future_version() }); } // state AnnotateActor annotation(currentLineage); - GetKeyValuesFamilyReply _rep = + GetKeyValuesReply _rep = wait(loadBalance(cx.getPtr(), beginServer.second, - getRangeRequestStream(), + &StorageServerInterface::getKeyValues, req, TaskPriority::DefaultPromiseEndpoint, AtMostOnce::False, @@ -3596,11 +3557,11 @@ Future getRange(Database cx, if (!rep.more) { ASSERT(modifiedSelectors); - TEST(true); // !GetKeyValuesFamilyReply.more and modifiedSelectors in getRange + TEST(true); // !GetKeyValuesReply.more and modifiedSelectors in getRange if (!rep.data.size()) { - RangeResult result = wait(getRangeFallback( - cx, version, originalBegin, originalEnd, mapper, originalLimits, reverse, info, tags)); + RangeResult result = wait(getRangeFallback( + cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -3618,7 +3579,7 @@ Future getRange(Database cx, else begin = firstGreaterOrEqual(shard.end); } else { - TEST(true); // GetKeyValuesFamilyReply.more in getRange + TEST(true); // GetKeyValuesReply.more in getRange if (reverse) end = firstGreaterOrEqual(output[output.size() - 1].key); else @@ -3636,8 +3597,8 @@ Future getRange(Database cx, Reverse{ reverse ? (end - 1).isBackward() : begin.isBackward() }); if (e.code() == error_code_wrong_shard_server) { - RangeResult result = wait(getRangeFallback( - cx, version, originalBegin, originalEnd, mapper, originalLimits, reverse, info, tags)); + RangeResult result = wait(getRangeFallback( + cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags)); getRangeFinished(cx, trLogInfo, startTime, @@ -4203,18 +4164,17 @@ Future getRange(Database const& cx, Reverse const& reverse, TransactionInfo const& info, TagSet const& tags) { - return getRange(cx, - Reference(), - fVersion, - begin, - end, - ""_sr, - limits, - Promise>(), - Snapshot::True, - reverse, - info, - tags); + return getRange(cx, + Reference(), + fVersion, + begin, + end, + limits, + Promise>(), + Snapshot::True, + reverse, + info, + tags); } bool DatabaseContext::debugUseTags = false; @@ -4509,26 +4469,13 @@ Future Transaction::getKey(const KeySelector& key, Snapshot snapshot) { return getKeyAndConflictRange(cx, key, getReadVersion(), conflictRange, info, options.readTags); } -template -void increaseCounterForRequest(Database cx) { - if constexpr (std::is_same::value) { - ++cx->transactionGetRangeRequests; - } else if (std::is_same::value) { - ++cx->transactionGetRangeAndFlatMapRequests; - } else { - UNREACHABLE(); - } -} - -template -Future Transaction::getRangeInternal(const KeySelector& begin, - const KeySelector& end, - const Key& mapper, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { +Future Transaction::getRange(const KeySelector& begin, + const KeySelector& end, + GetRangeLimits limits, + Snapshot snapshot, + Reverse reverse) { ++cx->transactionLogicalReads; - increaseCounterForRequest(cx); + ++cx->transactionGetRangeRequests; if (limits.isReached()) return RangeResult(); @@ -4560,37 +4507,8 @@ Future Transaction::getRangeInternal(const KeySelector& begin, extraConflictRanges.push_back(conflictRange.getFuture()); } - return ::getRange(cx, - trLogInfo, - getReadVersion(), - b, - e, - mapper, - limits, - conflictRange, - snapshot, - reverse, - info, - options.readTags); -} - -Future Transaction::getRange(const KeySelector& begin, - const KeySelector& end, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { - return getRangeInternal(begin, end, ""_sr, limits, snapshot, reverse); -} - -Future Transaction::getRangeAndFlatMap(const KeySelector& begin, - const KeySelector& end, - const Key& mapper, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { - - return getRangeInternal( - begin, end, mapper, limits, snapshot, reverse); + return ::getRange( + cx, trLogInfo, getReadVersion(), b, e, limits, conflictRange, snapshot, reverse, info, options.readTags); } Future Transaction::getRange(const KeySelector& begin, diff --git a/fdbclient/NativeAPI.actor.h b/fdbclient/NativeAPI.actor.h index af5b2b74199..6bd5ab892e0 100644 --- a/fdbclient/NativeAPI.actor.h +++ b/fdbclient/NativeAPI.actor.h @@ -289,23 +289,6 @@ class Transaction : NonCopyable { reverse); } - [[nodiscard]] Future getRangeAndFlatMap(const KeySelector& begin, - const KeySelector& end, - const Key& mapper, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False); - -private: - template - Future getRangeInternal(const KeySelector& begin, - const KeySelector& end, - const Key& mapper, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse); - -public: // A method for streaming data from the storage server that is more efficient than getRange when reading large // amounts of data [[nodiscard]] Future getRangeStream(const PromiseStream>& results, diff --git a/fdbclient/PaxosConfigTransaction.h b/fdbclient/PaxosConfigTransaction.h index 3854d4be963..4dfceb7a28e 100644 --- a/fdbclient/PaxosConfigTransaction.h +++ b/fdbclient/PaxosConfigTransaction.h @@ -50,14 +50,6 @@ class PaxosConfigTransaction final : public IConfigTransaction, public FastAlloc GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; - Future getRangeAndFlatMap(KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override { - throw client_invalid_operation(); - } void set(KeyRef const& key, ValueRef const& value) override; void clear(KeyRangeRef const&) override { throw client_invalid_operation(); } void clear(KeyRef const&) override; diff --git a/fdbclient/ReadYourWrites.actor.cpp b/fdbclient/ReadYourWrites.actor.cpp index 56ce22fd07b..f156a36c85d 100644 --- a/fdbclient/ReadYourWrites.actor.cpp +++ b/fdbclient/ReadYourWrites.actor.cpp @@ -74,16 +74,6 @@ class RYWImpl { using Result = RangeResult; }; - template - struct GetRangeAndFlatMapReq { - GetRangeAndFlatMapReq(KeySelector begin, KeySelector end, Key mapper, GetRangeLimits limits) - : begin(begin), end(end), mapper(mapper), limits(limits) {} - KeySelector begin, end; - Key mapper; - GetRangeLimits limits; - using Result = RangeResult; - }; - // read() Performs a read (get, getKey, getRange, etc), in the context of the given transaction. Snapshot or RYW // reads are distingushed by the type Iter being SnapshotCache::iterator or RYWIterator. Fills in the snapshot cache // as a side effect but does not affect conflict ranges. Some (indicated) overloads of read are required to update @@ -213,36 +203,6 @@ class RYWImpl { return v; } - ACTOR template - static Future readThroughAndFlatMap(ReadYourWritesTransaction* ryw, - GetRangeAndFlatMapReq read, - Snapshot snapshot) { - if (backwards && read.end.offset > 1) { - // FIXME: Optimistically assume that this will not run into the system keys, and only reissue if the result - // actually does. - Key key = wait(ryw->tr.getKey(read.end, snapshot)); - if (key > ryw->getMaxReadKey()) - read.end = firstGreaterOrEqual(ryw->getMaxReadKey()); - else - read.end = KeySelector(firstGreaterOrEqual(key), key.arena()); - } - - RangeResult v = wait(ryw->tr.getRangeAndFlatMap( - read.begin, read.end, read.mapper, read.limits, snapshot, backwards ? Reverse::True : Reverse::False)); - KeyRef maxKey = ryw->getMaxReadKey(); - if (v.size() > 0) { - if (!backwards && v[v.size() - 1].key >= maxKey) { - state RangeResult _v = v; - int i = _v.size() - 2; - for (; i >= 0 && _v[i].key >= maxKey; --i) { - } - return RangeResult(RangeResultRef(VectorRef(&_v[0], i + 1), false), _v.arena()); - } - } - - return v; - } - // addConflictRange(ryw,read,result) is called after a serializable read and is responsible for adding the relevant // conflict range @@ -349,15 +309,6 @@ class RYWImpl { } } ACTOR template - static Future readWithConflictRangeThroughAndFlatMap(ReadYourWritesTransaction* ryw, - Req req, - Snapshot snapshot) { - choose { - when(typename Req::Result result = wait(readThroughAndFlatMap(ryw, req, snapshot))) { return result; } - when(wait(ryw->resetPromise.getFuture())) { throw internal_error(); } - } - } - ACTOR template static Future readWithConflictRangeSnapshot(ReadYourWritesTransaction* ryw, Req req) { state SnapshotCache::iterator it(&ryw->cache, &ryw->writes); choose { @@ -393,20 +344,6 @@ class RYWImpl { return readWithConflictRangeRYW(ryw, req, snapshot); } - template - static inline Future readWithConflictRangeAndFlatMap(ReadYourWritesTransaction* ryw, - Req const& req, - Snapshot snapshot) { - if (ryw->options.readYourWritesDisabled) { - return readWithConflictRangeThroughAndFlatMap(ryw, req, snapshot); - } else if (snapshot && ryw->options.snapshotRywEnabled <= 0) { - TEST(true); // readWithConflictRangeSnapshot not supported for getRangeAndFlatMap - throw client_invalid_operation(); - } - TEST(true); // readWithConflictRangeRYW not supported for getRangeAndFlatMap - throw client_invalid_operation(); - } - template static void resolveKeySelectorFromCache(KeySelector& key, Iter& it, @@ -1572,65 +1509,6 @@ Future ReadYourWritesTransaction::getRange(const KeySelector& begin return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse); } -Future ReadYourWritesTransaction::getRangeAndFlatMap(KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Snapshot snapshot, - Reverse reverse) { - if (getDatabase()->apiVersionAtLeast(630)) { - if (specialKeys.contains(begin.getKey()) && specialKeys.begin <= end.getKey() && - end.getKey() <= specialKeys.end) { - TEST(true); // Special key space get range (FlatMap) - throw client_invalid_operation(); // Not support special keys. - } - } else { - if (begin.getKey() == LiteralStringRef("\xff\xff/worker_interfaces")) { - throw client_invalid_operation(); // Not support special keys. - } - } - - if (checkUsedDuringCommit()) { - return used_during_commit(); - } - - if (resetPromise.isSet()) - return resetPromise.getFuture().getError(); - - KeyRef maxKey = getMaxReadKey(); - if (begin.getKey() > maxKey || end.getKey() > maxKey) - return key_outside_legal_range(); - - // This optimization prevents nullptr operations from being added to the conflict range - if (limits.isReached()) { - TEST(true); // RYW range read limit 0 (FlatMap) - return RangeResult(); - } - - if (!limits.isValid()) - return range_limits_invalid(); - - if (begin.orEqual) - begin.removeOrEqual(begin.arena()); - - if (end.orEqual) - end.removeOrEqual(end.arena()); - - if (begin.offset >= end.offset && begin.getKey() >= end.getKey()) { - TEST(true); // RYW range inverted (FlatMap) - return RangeResult(); - } - - Future result = - reverse ? RYWImpl::readWithConflictRangeAndFlatMap( - this, RYWImpl::GetRangeAndFlatMapReq(begin, end, mapper, limits), snapshot) - : RYWImpl::readWithConflictRangeAndFlatMap( - this, RYWImpl::GetRangeAndFlatMapReq(begin, end, mapper, limits), snapshot); - - reading.add(success(result)); - return result; -} - Future>> ReadYourWritesTransaction::getAddressesForKey(const Key& key) { if (checkUsedDuringCommit()) { return used_during_commit(); diff --git a/fdbclient/ReadYourWrites.h b/fdbclient/ReadYourWrites.h index 19ce5c8775b..3ac84a76583 100644 --- a/fdbclient/ReadYourWrites.h +++ b/fdbclient/ReadYourWrites.h @@ -104,12 +104,6 @@ class ReadYourWritesTransaction final : NonCopyable, snapshot, reverse); } - Future getRangeAndFlatMap(KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override; [[nodiscard]] Future>> getAddressesForKey(const Key& key) override; Future>> getRangeSplitPoints(const KeyRange& range, int64_t chunkSize) override; diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 6ee837b8a18..44f0ec6e2ce 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -644,8 +644,6 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi init( MAX_STORAGE_COMMIT_TIME, 120.0 ); //The max fsync stall time on the storage server and tlog before marking a disk as failed init( RANGESTREAM_LIMIT_BYTES, 2e6 ); if( randomize && BUGGIFY ) RANGESTREAM_LIMIT_BYTES = 1; init( ENABLE_CLEAR_RANGE_EAGER_READS, true ); - init( QUICK_GET_VALUE_FALLBACK, true ); - init( QUICK_GET_KEY_VALUES_FALLBACK, true ); //Wait Failure init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2; diff --git a/fdbclient/ServerKnobs.h b/fdbclient/ServerKnobs.h index b8357535c8e..6a350652045 100644 --- a/fdbclient/ServerKnobs.h +++ b/fdbclient/ServerKnobs.h @@ -585,8 +585,6 @@ class ServerKnobs : public KnobsImpl { double MAX_STORAGE_COMMIT_TIME; int64_t RANGESTREAM_LIMIT_BYTES; bool ENABLE_CLEAR_RANGE_EAGER_READS; - bool QUICK_GET_VALUE_FALLBACK; - bool QUICK_GET_KEY_VALUES_FALLBACK; // Wait Failure int MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS; diff --git a/fdbclient/SimpleConfigTransaction.h b/fdbclient/SimpleConfigTransaction.h index 168b1a6c294..36423b1515f 100644 --- a/fdbclient/SimpleConfigTransaction.h +++ b/fdbclient/SimpleConfigTransaction.h @@ -59,14 +59,6 @@ class SimpleConfigTransaction final : public IConfigTransaction, public FastAllo GetRangeLimits limits, Snapshot = Snapshot::False, Reverse = Reverse::False) override; - Future getRangeAndFlatMap(KeySelector begin, - KeySelector end, - Key mapper, - GetRangeLimits limits, - Snapshot = Snapshot::False, - Reverse = Reverse::False) override { - throw client_invalid_operation(); - } Future commit() override; Version getCommittedVersion() const override; void setOption(FDBTransactionOptions::Option option, Optional value = Optional()) override; diff --git a/fdbclient/StorageServerInterface.cpp b/fdbclient/StorageServerInterface.cpp index 11c9f46f562..e2d403fc2f9 100644 --- a/fdbclient/StorageServerInterface.cpp +++ b/fdbclient/StorageServerInterface.cpp @@ -152,45 +152,6 @@ void TSS_traceMismatch(TraceEvent& event, .detail("TSSReply", tssResultsString); } -// range reads and flat map -template <> -bool TSS_doCompare(const GetKeyValuesAndFlatMapReply& src, const GetKeyValuesAndFlatMapReply& tss) { - return src.more == tss.more && src.data == tss.data; -} - -template <> -const char* TSS_mismatchTraceName(const GetKeyValuesAndFlatMapRequest& req) { - return "TSSMismatchGetKeyValuesAndFlatMap"; -} - -template <> -void TSS_traceMismatch(TraceEvent& event, - const GetKeyValuesAndFlatMapRequest& req, - const GetKeyValuesAndFlatMapReply& src, - const GetKeyValuesAndFlatMapReply& tss) { - std::string ssResultsString = format("(%d)%s:\n", src.data.size(), src.more ? "+" : ""); - for (auto& it : src.data) { - ssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); - } - - std::string tssResultsString = format("(%d)%s:\n", tss.data.size(), tss.more ? "+" : ""); - for (auto& it : tss.data) { - tssResultsString += "\n" + it.key.printable() + "=" + traceChecksumValue(it.value); - } - event - .detail( - "Begin", - format("%s%s:%d", req.begin.orEqual ? "=" : "", req.begin.getKey().printable().c_str(), req.begin.offset)) - .detail("End", - format("%s%s:%d", req.end.orEqual ? "=" : "", req.end.getKey().printable().c_str(), req.end.offset)) - .detail("Version", req.version) - .detail("Limit", req.limit) - .detail("LimitBytes", req.limitBytes) - .setMaxFieldLength(FLOW_KNOBS->TSS_LARGE_TRACE_SIZE * 4 / 10) - .detail("SSReply", ssResultsString) - .detail("TSSReply", tssResultsString); -} - // streaming range reads template <> bool TSS_doCompare(const GetKeyValuesStreamReply& src, const GetKeyValuesStreamReply& tss) { @@ -395,12 +356,6 @@ void TSSMetrics::recordLatency(const GetKeyValuesRequest& req, double ssLatency, TSSgetKeyValuesLatency.addSample(tssLatency); } -template <> -void TSSMetrics::recordLatency(const GetKeyValuesAndFlatMapRequest& req, double ssLatency, double tssLatency) { - SSgetKeyValuesAndFlatMapLatency.addSample(ssLatency); - TSSgetKeyValuesAndFlatMapLatency.addSample(tssLatency); -} - template <> void TSSMetrics::recordLatency(const WatchValueRequest& req, double ssLatency, double tssLatency) {} diff --git a/fdbclient/StorageServerInterface.h b/fdbclient/StorageServerInterface.h index ba912adbb1c..1671abe67d4 100644 --- a/fdbclient/StorageServerInterface.h +++ b/fdbclient/StorageServerInterface.h @@ -22,7 +22,6 @@ #define FDBCLIENT_STORAGESERVERINTERFACE_H #pragma once -#include #include "fdbclient/FDBTypes.h" #include "fdbrpc/Locality.h" #include "fdbrpc/QueueModel.h" @@ -66,7 +65,6 @@ struct StorageServerInterface { // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read RequestStream getKeyValues; - RequestStream getKeyValuesAndFlatMap; RequestStream getShardState; RequestStream waitMetrics; @@ -131,8 +129,6 @@ struct StorageServerInterface { RequestStream(getValue.getEndpoint().getAdjustedEndpoint(15)); changeFeedPop = RequestStream(getValue.getEndpoint().getAdjustedEndpoint(16)); - getKeyValuesAndFlatMap = - RequestStream(getValue.getEndpoint().getAdjustedEndpoint(17)); } } else { ASSERT(Ar::isDeserializing); @@ -178,7 +174,6 @@ struct StorageServerInterface { streams.push_back(changeFeedStream.getReceiver()); streams.push_back(overlappingChangeFeeds.getReceiver()); streams.push_back(changeFeedPop.getReceiver()); - streams.push_back(getKeyValuesAndFlatMap.getReceiver(TaskPriority::LoadBalancedEndpoint)); FlowTransport::transport().addEndpoints(streams); } }; @@ -301,9 +296,6 @@ struct GetKeyValuesRequest : TimedRequest { SpanID spanContext; Arena arena; KeySelectorRef begin, end; - // This is a dummy field there has never been used. - // TODO: Get rid of this by constexpr or other template magic in getRange - KeyRef mapper = KeyRef(); Version version; // or latestVersion int limit, limitBytes; bool isFetchKeys; @@ -318,43 +310,6 @@ struct GetKeyValuesRequest : TimedRequest { } }; -struct GetKeyValuesAndFlatMapReply : public LoadBalancedReply { - constexpr static FileIdentifier file_identifier = 1783067; - Arena arena; - VectorRef data; - Version version; // useful when latestVersion was requested - bool more; - bool cached = false; - - GetKeyValuesAndFlatMapReply() : version(invalidVersion), more(false), cached(false) {} - - template - void serialize(Ar& ar) { - serializer(ar, LoadBalancedReply::penalty, LoadBalancedReply::error, data, version, more, cached, arena); - } -}; - -struct GetKeyValuesAndFlatMapRequest : TimedRequest { - constexpr static FileIdentifier file_identifier = 6795747; - SpanID spanContext; - Arena arena; - KeySelectorRef begin, end; - KeyRef mapper; - Version version; // or latestVersion - int limit, limitBytes; - bool isFetchKeys; - Optional tags; - Optional debugID; - ReplyPromise reply; - - GetKeyValuesAndFlatMapRequest() : isFetchKeys(false) {} - template - void serialize(Ar& ar) { - serializer( - ar, begin, end, mapper, version, limit, limitBytes, isFetchKeys, tags, debugID, reply, spanContext, arena); - } -}; - struct GetKeyValuesStreamReply : public ReplyPromiseStreamReply { constexpr static FileIdentifier file_identifier = 1783066; Arena arena; diff --git a/fdbclient/ThreadSafeTransaction.cpp b/fdbclient/ThreadSafeTransaction.cpp index ace522cac02..3810d081910 100644 --- a/fdbclient/ThreadSafeTransaction.cpp +++ b/fdbclient/ThreadSafeTransaction.cpp @@ -257,23 +257,6 @@ ThreadFuture ThreadSafeTransaction::getRange(const KeySelectorRef& }); } -ThreadFuture ThreadSafeTransaction::getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) { - KeySelector b = begin; - KeySelector e = end; - Key h = mapper; - - ISingleThreadTransaction* tr = this->tr; - return onMainThread([tr, b, e, h, limits, snapshot, reverse]() -> Future { - tr->checkDeferredError(); - return tr->getRangeAndFlatMap(b, e, h, limits, Snapshot{ snapshot }, Reverse{ reverse }); - }); -} - ThreadFuture>> ThreadSafeTransaction::getAddressesForKey(const KeyRef& key) { Key k = key; diff --git a/fdbclient/ThreadSafeTransaction.h b/fdbclient/ThreadSafeTransaction.h index 85ae27c1fe9..75faa677458 100644 --- a/fdbclient/ThreadSafeTransaction.h +++ b/fdbclient/ThreadSafeTransaction.h @@ -106,12 +106,6 @@ class ThreadSafeTransaction : public ITransaction, ThreadSafeReferenceCounted getRangeAndFlatMap(const KeySelectorRef& begin, - const KeySelectorRef& end, - const StringRef& mapper, - GetRangeLimits limits, - bool snapshot, - bool reverse) override; ThreadFuture>> getAddressesForKey(const KeyRef& key) override; ThreadFuture> getVersionstamp() override; ThreadFuture getEstimatedRangeSizeBytes(const KeyRangeRef& keys) override; diff --git a/fdbrpc/TSSComparison.h b/fdbrpc/TSSComparison.h index 54114c2db80..af5080af6ff 100644 --- a/fdbrpc/TSSComparison.h +++ b/fdbrpc/TSSComparison.h @@ -51,12 +51,10 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { ContinuousSample SSgetValueLatency; ContinuousSample SSgetKeyLatency; ContinuousSample SSgetKeyValuesLatency; - ContinuousSample SSgetKeyValuesAndFlatMapLatency; ContinuousSample TSSgetValueLatency; ContinuousSample TSSgetKeyLatency; ContinuousSample TSSgetKeyValuesLatency; - ContinuousSample TSSgetKeyValuesAndFlatMapLatency; std::unordered_map ssErrorsByCode; std::unordered_map tssErrorsByCode; @@ -105,8 +103,7 @@ struct TSSMetrics : ReferenceCounted, NonCopyable { : cc("TSSClientMetrics"), requests("Requests", cc), streamComparisons("StreamComparisons", cc), ssErrors("SSErrors", cc), tssErrors("TSSErrors", cc), tssTimeouts("TSSTimeouts", cc), mismatches("Mismatches", cc), SSgetValueLatency(1000), SSgetKeyLatency(1000), SSgetKeyValuesLatency(1000), - SSgetKeyValuesAndFlatMapLatency(1000), TSSgetValueLatency(1000), TSSgetKeyLatency(1000), - TSSgetKeyValuesLatency(1000), TSSgetKeyValuesAndFlatMapLatency(1000) {} + TSSgetValueLatency(1000), TSSgetKeyLatency(1000), TSSgetKeyValuesLatency(1000) {} }; template diff --git a/fdbserver/CMakeLists.txt b/fdbserver/CMakeLists.txt index 29f01a15953..ca7d7d6db5b 100644 --- a/fdbserver/CMakeLists.txt +++ b/fdbserver/CMakeLists.txt @@ -208,7 +208,6 @@ set(FDBSERVER_SRCS workloads/MemoryLifetime.actor.cpp workloads/MetricLogging.actor.cpp workloads/MutationLogReaderCorrectness.actor.cpp - workloads/IndexPrefetchDemo.actor.cpp workloads/ParallelRestore.actor.cpp workloads/Performance.actor.cpp workloads/Ping.actor.cpp diff --git a/fdbserver/storageserver.actor.cpp b/fdbserver/storageserver.actor.cpp index 35a8ad0eaf4..c7bb89afc6f 100644 --- a/fdbserver/storageserver.actor.cpp +++ b/fdbserver/storageserver.actor.cpp @@ -42,7 +42,6 @@ #include "fdbclient/NativeAPI.actor.h" #include "fdbclient/Notified.h" #include "fdbclient/StatusClient.h" -#include "fdbclient/Tuple.h" #include "fdbclient/SystemData.h" #include "fdbclient/TransactionLineage.h" #include "fdbclient/VersionedMap.h" @@ -780,9 +779,8 @@ struct StorageServer { struct Counters { CounterCollection cc; - Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeAndFlatMapQueries, - getRangeStreamQueries, finishedQueries, lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, - emptyQueries; + Counter allQueries, getKeyQueries, getValueQueries, getRangeQueries, getRangeStreamQueries, finishedQueries, + lowPriorityQueries, rowsQueried, bytesQueried, watchQueries, emptyQueries; // Bytes of the mutations that have been added to the memory of the storage server. When the data is durable // and cleared from the memory, we do not subtract it but add it to bytesDurable. @@ -809,10 +807,6 @@ struct StorageServer { Counter wrongShardServer; Counter fetchedVersions; Counter fetchesFromLogs; - // The following counters measure how many of lookups in the getRangeAndFlatMapQueries are effective. "Miss" - // means fallback if fallback is enabled, otherwise means failure (so that another layer could implement - // fallback). - Counter quickGetValueHit, quickGetValueMiss, quickGetKeyValuesHit, quickGetKeyValuesMiss; LatencySample readLatencySample; LatencyBands readLatencyBands; @@ -820,25 +814,22 @@ struct StorageServer { Counters(StorageServer* self) : cc("StorageServer", self->thisServerID.toString()), allQueries("QueryQueue", cc), getKeyQueries("GetKeyQueries", cc), getValueQueries("GetValueQueries", cc), - getRangeQueries("GetRangeQueries", cc), getRangeAndFlatMapQueries("GetRangeAndFlatMapQueries", cc), - getRangeStreamQueries("GetRangeStreamQueries", cc), finishedQueries("FinishedQueries", cc), - lowPriorityQueries("LowPriorityQueries", cc), rowsQueried("RowsQueried", cc), - bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), emptyQueries("EmptyQueries", cc), - bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), bytesFetched("BytesFetched", cc), - mutationBytes("MutationBytes", cc), sampledBytesCleared("SampledBytesCleared", cc), - kvFetched("KVFetched", cc), mutations("Mutations", cc), setMutations("SetMutations", cc), - clearRangeMutations("ClearRangeMutations", cc), atomicMutations("AtomicMutations", cc), - updateBatches("UpdateBatches", cc), updateVersions("UpdateVersions", cc), loops("Loops", cc), - fetchWaitingMS("FetchWaitingMS", cc), fetchWaitingCount("FetchWaitingCount", cc), - fetchExecutingMS("FetchExecutingMS", cc), fetchExecutingCount("FetchExecutingCount", cc), - readsRejected("ReadsRejected", cc), wrongShardServer("WrongShardServer", cc), - fetchedVersions("FetchedVersions", cc), fetchesFromLogs("FetchesFromLogs", cc), - quickGetValueHit("QuickGetValueHit", cc), quickGetValueMiss("QuickGetValueMiss", cc), - quickGetKeyValuesHit("QuickGetKeyValuesHit", cc), quickGetKeyValuesMiss("QuickGetKeyValuesMiss", cc), - readLatencySample("ReadLatencyMetrics", - self->thisServerID, - SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, - SERVER_KNOBS->LATENCY_SAMPLE_SIZE), + getRangeQueries("GetRangeQueries", cc), getRangeStreamQueries("GetRangeStreamQueries", cc), + finishedQueries("FinishedQueries", cc), lowPriorityQueries("LowPriorityQueries", cc), + rowsQueried("RowsQueried", cc), bytesQueried("BytesQueried", cc), watchQueries("WatchQueries", cc), + emptyQueries("EmptyQueries", cc), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), + bytesFetched("BytesFetched", cc), mutationBytes("MutationBytes", cc), + sampledBytesCleared("SampledBytesCleared", cc), kvFetched("KVFetched", cc), mutations("Mutations", cc), + setMutations("SetMutations", cc), clearRangeMutations("ClearRangeMutations", cc), + atomicMutations("AtomicMutations", cc), updateBatches("UpdateBatches", cc), + updateVersions("UpdateVersions", cc), loops("Loops", cc), fetchWaitingMS("FetchWaitingMS", cc), + fetchWaitingCount("FetchWaitingCount", cc), fetchExecutingMS("FetchExecutingMS", cc), + fetchExecutingCount("FetchExecutingCount", cc), readsRejected("ReadsRejected", cc), + wrongShardServer("WrongShardServer", cc), fetchedVersions("FetchedVersions", cc), + fetchesFromLogs("FetchesFromLogs", cc), readLatencySample("ReadLatencyMetrics", + self->thisServerID, + SERVER_KNOBS->LATENCY_METRICS_LOGGING_INTERVAL, + SERVER_KNOBS->LATENCY_SAMPLE_SIZE), readLatencyBands("ReadLatencyBands", self->thisServerID, SERVER_KNOBS->STORAGE_LOGGING_DELAY) { specialCounter(cc, "LastTLogVersion", [self]() { return self->lastTLogVersion; }); specialCounter(cc, "Version", [self]() { return self->version.get(); }); @@ -1994,37 +1985,6 @@ void merge(Arena& arena, } } -ACTOR Future> quickGetValue(StorageServer* data, StringRef key, Version version) { - if (data->shards[key]->isReadable()) { - try { - // TODO: Use a lower level API may be better? Or tweak priorities? - GetValueRequest req(Span().context, key, version, Optional(), Optional()); - data->actors.add(data->readGuard(req, getValueQ)); - GetValueReply reply = wait(req.reply.getFuture()); - ++data->counters.quickGetValueHit; - return reply.value; - } catch (Error& e) { - // Fallback. - } - } else { - // Fallback. - } - - ++data->counters.quickGetValueMiss; - if (SERVER_KNOBS->QUICK_GET_VALUE_FALLBACK) { - state Transaction tr(data->cx); - tr.setVersion(version); - // TODO: is DefaultPromiseEndpoint the best priority for this? - tr.info.taskID = TaskPriority::DefaultPromiseEndpoint; - Future> valueFuture = tr.get(key, Snapshot::True); - // TODO: async in case it needs to read from other servers. - state Optional valueOption = wait(valueFuture); - return valueOption; - } else { - throw quick_get_value_miss(); - } -}; - // If limit>=0, it returns the first rows in the range (sorted ascending), otherwise the last rows (sorted descending). // readRange has O(|result|) + O(log |data|) cost ACTOR Future readRange(StorageServer* data, @@ -2510,440 +2470,6 @@ ACTOR Future getKeyValuesQ(StorageServer* data, GetKeyValuesRequest req) return Void(); } -ACTOR Future quickGetKeyValues(StorageServer* data, StringRef prefix, Version version) { - try { - // TODO: Use a lower level API may be better? Or tweak priorities? - GetKeyValuesRequest req; - req.spanContext = Span().context; - req.arena = Arena(); - req.begin = firstGreaterOrEqual(KeyRef(req.arena, prefix)); - req.end = firstGreaterOrEqual(strinc(prefix, req.arena)); - req.version = version; - - data->actors.add(data->readGuard(req, getKeyValuesQ)); - GetKeyValuesReply reply = wait(req.reply.getFuture()); - ++data->counters.quickGetKeyValuesHit; - - // Convert GetKeyValuesReply to RangeResult. - return RangeResult(RangeResultRef(reply.data, reply.more), reply.arena); - } catch (Error& e) { - // Fallback. - } - - ++data->counters.quickGetKeyValuesMiss; - if (SERVER_KNOBS->QUICK_GET_KEY_VALUES_FALLBACK) { - state Transaction tr(data->cx); - tr.setVersion(version); - // TODO: is DefaultPromiseEndpoint the best priority for this? - tr.info.taskID = TaskPriority::DefaultPromiseEndpoint; - Future rangeResultFuture = tr.getRange(prefixRange(prefix), Snapshot::True); - // TODO: async in case it needs to read from other servers. - RangeResult rangeResult = wait(rangeResultFuture); - return rangeResult; - } else { - throw quick_get_key_values_miss(); - } -}; - -Key constructMappedKey(KeyValueRef* keyValue, Tuple& mappedKeyFormatTuple, bool& isRangeQuery) { - // Lazily parse key and/or value to tuple because they may not need to be a tuple if not used. - Optional keyTuple; - Optional valueTuple; - - Tuple mappedKeyTuple; - for (int i = 0; i < mappedKeyFormatTuple.size(); i++) { - Tuple::ElementType type = mappedKeyFormatTuple.getType(i); - if (type == Tuple::BYTES || type == Tuple::UTF8) { - std::string s = mappedKeyFormatTuple.getString(i).toString(); - auto sz = s.size(); - - // Handle escape. - bool escaped = false; - size_t p = 0; - while (true) { - size_t found = s.find("{{", p); - if (found == std::string::npos) { - break; - } - s.replace(found, 2, "{"); - p += 1; - escaped = true; - } - p = 0; - while (true) { - size_t found = s.find("}}", p); - if (found == std::string::npos) { - break; - } - s.replace(found, 2, "}"); - p += 1; - escaped = true; - } - if (escaped) { - // If the element uses escape, cope the escaped version. - mappedKeyTuple.append(s); - } - // {K[??]} or {V[??]} - else if (sz > 5 && s[0] == '{' && (s[1] == 'K' || s[1] == 'V') && s[2] == '[' && s[sz - 2] == ']' && - s[sz - 1] == '}') { - int idx; - try { - idx = std::stoi(s.substr(3, sz - 5)); - } catch (std::exception& e) { - throw mapper_bad_index(); - } - Tuple* referenceTuple; - if (s[1] == 'K') { - // Use keyTuple as reference. - if (!keyTuple.present()) { - // May throw exception if the key is not parsable as a tuple. - keyTuple = Tuple::unpack(keyValue->key); - } - referenceTuple = &keyTuple.get(); - } else if (s[1] == 'V') { - // Use valueTuple as reference. - if (!valueTuple.present()) { - // May throw exception if the value is not parsable as a tuple. - valueTuple = Tuple::unpack(keyValue->value); - } - referenceTuple = &valueTuple.get(); - } else { - ASSERT(false); - throw internal_error(); - } - - if (idx < 0 || idx >= referenceTuple->size()) { - throw mapper_bad_index(); - } - mappedKeyTuple.append(referenceTuple->subTuple(idx, idx + 1)); - } else if (s == "{...}") { - // Range query. - if (i != mappedKeyFormatTuple.size() - 1) { - // It must be the last element of the mapper tuple - throw mapper_bad_range_decriptor(); - } - // Every record will try to set it. It's ugly, but not wrong. - isRangeQuery = true; - // Do not add it to the mapped key. - } else { - // If the element is a string but neither escaped nor descriptors, just copy it. - mappedKeyTuple.append(mappedKeyFormatTuple.subTuple(i, i + 1)); - } - } else { - // If the element not a string, just copy it. - mappedKeyTuple.append(mappedKeyFormatTuple.subTuple(i, i + 1)); - } - } - return mappedKeyTuple.getDataAsStandalone(); -} - -TEST_CASE("/fdbserver/storageserver/constructMappedKey") { - Key key = Tuple().append("key-0"_sr).append("key-1"_sr).append("key-2"_sr).getDataAsStandalone(); - Value value = Tuple().append("value-0"_sr).append("value-1"_sr).append("value-2"_sr).getDataAsStandalone(); - state KeyValueRef kvr(key, value); - { - Tuple mapperTuple = Tuple() - .append("normal"_sr) - .append("{{escaped}}"_sr) - .append("{K[2]}"_sr) - .append("{V[0]}"_sr) - .append("{...}"_sr); - - bool isRangeQuery = false; - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - - Key expectedMappedKey = Tuple() - .append("normal"_sr) - .append("{escaped}"_sr) - .append("key-2"_sr) - .append("value-0"_sr) - .getDataAsStandalone(); - // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; - ASSERT(mappedKey.compare(expectedMappedKey) == 0); - ASSERT(isRangeQuery == true); - } - { - Tuple mapperTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); - - bool isRangeQuery = false; - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - - Key expectedMappedKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); - // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; - ASSERT(mappedKey.compare(expectedMappedKey) == 0); - ASSERT(isRangeQuery == false); - } - { - Tuple mapperTuple = Tuple().append("{{{{}}"_sr).append("}}"_sr); - - bool isRangeQuery = false; - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - - Key expectedMappedKey = Tuple().append("{{}"_sr).append("}"_sr).getDataAsStandalone(); - // std::cout << printable(mappedKey) << " == " << printable(expectedMappedKey) << std::endl; - ASSERT(mappedKey.compare(expectedMappedKey) == 0); - ASSERT(isRangeQuery == false); - } - { - Tuple mapperTuple = Tuple().append("{K[100]}"_sr); - bool isRangeQuery = false; - state bool throwException = false; - try { - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - } catch (Error& e) { - ASSERT(e.code() == error_code_mapper_bad_index); - throwException = true; - } - ASSERT(throwException); - } - { - Tuple mapperTuple = Tuple().append("{...}"_sr).append("last-element"_sr); - bool isRangeQuery = false; - state bool throwException2 = false; - try { - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - } catch (Error& e) { - ASSERT(e.code() == error_code_mapper_bad_range_decriptor); - throwException2 = true; - } - ASSERT(throwException2); - } - { - Tuple mapperTuple = Tuple().append("{K[not-a-number]}"_sr); - bool isRangeQuery = false; - state bool throwException3 = false; - try { - Key mappedKey = constructMappedKey(&kvr, mapperTuple, isRangeQuery); - } catch (Error& e) { - ASSERT(e.code() == error_code_mapper_bad_index); - throwException3 = true; - } - ASSERT(throwException3); - } - return Void(); -} - -ACTOR Future flatMap(StorageServer* data, GetKeyValuesReply input, StringRef mapper) { - state GetKeyValuesAndFlatMapReply result; - result.version = input.version; - result.more = input.more; - result.cached = input.cached; - result.arena.dependsOn(input.arena); - - result.data.reserve(result.arena, input.data.size()); - state bool isRangeQuery = false; - state Tuple mappedKeyFormatTuple = Tuple::unpack(mapper); - state KeyValueRef* it = input.data.begin(); - for (; it != input.data.end(); it++) { - state StringRef key = it->key; - - state Key mappedKey = constructMappedKey(it, mappedKeyFormatTuple, isRangeQuery); - // Make sure the mappedKey is always available, so that it's good even we want to get key asynchronously. - result.arena.dependsOn(mappedKey.arena()); - - if (isRangeQuery) { - // Use the mappedKey as the prefix of the range query. - RangeResult rangeResult = wait(quickGetKeyValues(data, mappedKey, input.version)); - - if (rangeResult.more) { - // Probably the fan out is too large. The user should use the old way to query. - throw quick_get_key_values_has_more(); - } - result.arena.dependsOn(rangeResult.arena()); - for (int i = 0; i < rangeResult.size(); i++) { - result.data.emplace_back(result.arena, rangeResult[i].key, rangeResult[i].value); - } - } else { - Optional valueOption = wait(quickGetValue(data, mappedKey, input.version)); - - if (valueOption.present()) { - Value value = valueOption.get(); - result.arena.dependsOn(value.arena()); - result.data.emplace_back(result.arena, mappedKey, value); - } else { - // TODO: Shall we throw exception if the key doesn't exist or the range is empty? - } - } - } - return result; -} - -// Most of the actor is copied from getKeyValuesQ. I tried to use templates but things become nearly impossible after -// combining actor shenanigans with template shenanigans. -ACTOR Future getKeyValuesAndFlatMapQ(StorageServer* data, GetKeyValuesAndFlatMapRequest req) -// Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large -// selector offset prevents all data from being read in one range read -{ - state Span span("SS:getKeyValuesAndFlatMap"_loc, { req.spanContext }); - state int64_t resultSize = 0; - state IKeyValueStore::ReadType type = - req.isFetchKeys ? IKeyValueStore::ReadType::FETCH : IKeyValueStore::ReadType::NORMAL; - getCurrentLineage()->modify(&TransactionLineage::txID) = req.spanContext.first(); - - ++data->counters.getRangeAndFlatMapQueries; - ++data->counters.allQueries; - ++data->readQueueSizeMetric; - data->maxQueryQueue = std::max( - data->maxQueryQueue, data->counters.allQueries.getValue() - data->counters.finishedQueries.getValue()); - - // Active load balancing runs at a very high priority (to obtain accurate queue lengths) - // so we need to downgrade here - if (SERVER_KNOBS->FETCH_KEYS_LOWER_PRIORITY && req.isFetchKeys) { - wait(delay(0, TaskPriority::FetchKeys)); - } else { - wait(data->getQueryDelay()); - } - - try { - if (req.debugID.present()) - g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Before"); - state Version version = wait(waitForVersion(data, req.version, span.context)); - - state uint64_t changeCounter = data->shardChangeCounter; - // try { - state KeyRange shard = getShardKeyRange(data, req.begin); - - if (req.debugID.present()) - g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterVersion"); - //.detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end); - //} catch (Error& e) { TraceEvent("WrongShardServer", data->thisServerID).detail("Begin", - // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("Shard", - //"None").detail("In", "getKeyValuesAndFlatMap>getShardKeyRange"); throw e; } - - if (!selectorInRange(req.end, shard) && !(req.end.isFirstGreaterOrEqual() && req.end.getKey() == shard.end)) { - // TraceEvent("WrongShardServer1", data->thisServerID).detail("Begin", - // req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", - // shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkShardExtents"); - throw wrong_shard_server(); - } - - state int offset1 = 0; - state int offset2; - state Future fBegin = req.begin.isFirstGreaterOrEqual() - ? Future(req.begin.getKey()) - : findKey(data, req.begin, version, shard, &offset1, span.context, type); - state Future fEnd = req.end.isFirstGreaterOrEqual() - ? Future(req.end.getKey()) - : findKey(data, req.end, version, shard, &offset2, span.context, type); - state Key begin = wait(fBegin); - state Key end = wait(fEnd); - - if (req.debugID.present()) - g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.AfterKeys"); - //.detail("Off1",offset1).detail("Off2",offset2).detail("ReqBegin",req.begin.getKey()).detail("ReqEnd",req.end.getKey()); - - // Offsets of zero indicate begin/end keys in this shard, which obviously means we can answer the query - // An end offset of 1 is also OK because the end key is exclusive, so if the first key of the next shard is the - // end the last actual key returned must be from this shard. A begin offset of 1 is also OK because then either - // begin is past end or equal to end (so the result is definitely empty) - if ((offset1 && offset1 != 1) || (offset2 && offset2 != 1)) { - TEST(true); // wrong_shard_server due to offset in getKeyValuesAndFlatMapQ - // We could detect when offset1 takes us off the beginning of the database or offset2 takes us off the end, - // and return a clipped range rather than an error (since that is what the NativeAPI.getRange will do anyway - // via its "slow path"), but we would have to add some flags to the response to encode whether we went off - // the beginning and the end, since it needs that information. - //TraceEvent("WrongShardServer2", data->thisServerID).detail("Begin", req.begin.toString()).detail("End", req.end.toString()).detail("Version", version).detail("ShardBegin", shard.begin).detail("ShardEnd", shard.end).detail("In", "getKeyValuesAndFlatMap>checkOffsets").detail("BeginKey", begin).detail("EndKey", end).detail("BeginOffset", offset1).detail("EndOffset", offset2); - throw wrong_shard_server(); - } - - if (begin >= end) { - if (req.debugID.present()) - g_traceBatch.addEvent( - "TransactionDebug", req.debugID.get().first(), "storageserver.getKeyValuesAndFlatMap.Send"); - //.detail("Begin",begin).detail("End",end); - - GetKeyValuesAndFlatMapReply none; - none.version = version; - none.more = false; - none.penalty = data->getPenalty(); - - data->checkChangeCounter(changeCounter, - KeyRangeRef(std::min(req.begin.getKey(), req.end.getKey()), - std::max(req.begin.getKey(), req.end.getKey()))); - req.reply.send(none); - } else { - state int remainingLimitBytes = req.limitBytes; - - GetKeyValuesReply _r = wait( - readRange(data, version, KeyRangeRef(begin, end), req.limit, &remainingLimitBytes, span.context, type)); - - // Map the scanned range to another list of keys and look up. - state GetKeyValuesAndFlatMapReply r = wait(flatMap(data, _r, req.mapper)); - - if (req.debugID.present()) - g_traceBatch.addEvent("TransactionDebug", - req.debugID.get().first(), - "storageserver.getKeyValuesAndFlatMap.AfterReadRange"); - //.detail("Begin",begin).detail("End",end).detail("SizeOf",r.data.size()); - data->checkChangeCounter( - changeCounter, - KeyRangeRef(std::min(begin, std::min(req.begin.getKey(), req.end.getKey())), - std::max(end, std::max(req.begin.getKey(), req.end.getKey())))); - if (EXPENSIVE_VALIDATION) { - // TODO: Only mapped keys are returned, which are not supposed to be in the range. - // for (int i = 0; i < r.data.size(); i++) - // ASSERT(r.data[i].key >= begin && r.data[i].key < end); - // TODO: GetKeyValuesWithFlatMapRequest doesn't respect limit yet. - // ASSERT(r.data.size() <= std::abs(req.limit)); - } - - /*for( int i = 0; i < r.data.size(); i++ ) { - StorageMetrics m; - m.bytesPerKSecond = r.data[i].expectedSize(); - m.iosPerKSecond = 1; //FIXME: this should be 1/r.data.size(), but we cannot do that because it is an int - data->metrics.notify(r.data[i].key, m); - }*/ - - // For performance concerns, the cost of a range read is billed to the start key and end key of the range. - int64_t totalByteSize = 0; - for (int i = 0; i < r.data.size(); i++) { - totalByteSize += r.data[i].expectedSize(); - } - if (totalByteSize > 0 && SERVER_KNOBS->READ_SAMPLING_ENABLED) { - int64_t bytesReadPerKSecond = std::max(totalByteSize, SERVER_KNOBS->EMPTY_READ_PENALTY) / 2; - data->metrics.notifyBytesReadPerKSecond(r.data[0].key, bytesReadPerKSecond); - data->metrics.notifyBytesReadPerKSecond(r.data[r.data.size() - 1].key, bytesReadPerKSecond); - } - - r.penalty = data->getPenalty(); - req.reply.send(r); - - resultSize = req.limitBytes - remainingLimitBytes; - data->counters.bytesQueried += resultSize; - data->counters.rowsQueried += r.data.size(); - if (r.data.size() == 0) { - ++data->counters.emptyQueries; - } - } - } catch (Error& e) { - if (!canReplyWith(e)) - throw; - data->sendErrorWithPenalty(req.reply, e, data->getPenalty()); - } - - data->transactionTagCounter.addRequest(req.tags, resultSize); - ++data->counters.finishedQueries; - --data->readQueueSizeMetric; - - double duration = g_network->timer() - req.requestTime(); - data->counters.readLatencySample.addMeasurement(duration); - if (data->latencyBandConfig.present()) { - int maxReadBytes = - data->latencyBandConfig.get().readConfig.maxReadBytes.orDefault(std::numeric_limits::max()); - int maxSelectorOffset = - data->latencyBandConfig.get().readConfig.maxKeySelectorOffset.orDefault(std::numeric_limits::max()); - data->counters.readLatencyBands.addMeasurement(duration, - resultSize > maxReadBytes || - abs(req.begin.offset) > maxSelectorOffset || - abs(req.end.offset) > maxSelectorOffset); - } - - return Void(); -} - ACTOR Future getKeyValuesStreamQ(StorageServer* data, GetKeyValuesStreamRequest req) // Throws a wrong_shard_server if the keys in the request or result depend on data outside this server OR if a large // selector offset prevents all data from being read in one range read @@ -6164,20 +5690,6 @@ ACTOR Future serveGetKeyValuesRequests(StorageServer* self, FutureStream serveGetKeyValuesAndFlatMapRequests( - StorageServer* self, - FutureStream getKeyValuesAndFlatMap) { - // TODO: Is it fine to keep TransactionLineage::Operation::GetKeyValues here? - getCurrentLineage()->modify(&TransactionLineage::operation) = TransactionLineage::Operation::GetKeyValues; - loop { - GetKeyValuesAndFlatMapRequest req = waitNext(getKeyValuesAndFlatMap); - - // Warning: This code is executed at extremely high priority (TaskPriority::LoadBalancedEndpoint), so downgrade - // before doing real work - self->actors.add(self->readGuard(req, getKeyValuesAndFlatMapQ)); - } -} - ACTOR Future serveGetKeyValuesStreamRequests(StorageServer* self, FutureStream getKeyValuesStream) { loop { @@ -6377,7 +5889,6 @@ ACTOR Future storageServerCore(StorageServer* self, StorageServerInterface self->actors.add(checkBehind(self)); self->actors.add(serveGetValueRequests(self, ssi.getValue.getFuture())); self->actors.add(serveGetKeyValuesRequests(self, ssi.getKeyValues.getFuture())); - self->actors.add(serveGetKeyValuesAndFlatMapRequests(self, ssi.getKeyValuesAndFlatMap.getFuture())); self->actors.add(serveGetKeyValuesStreamRequests(self, ssi.getKeyValuesStream.getFuture())); self->actors.add(serveGetKeyRequests(self, ssi.getKey.getFuture())); self->actors.add(serveWatchValueRequests(self, ssi.watchValue.getFuture())); diff --git a/fdbserver/worker.actor.cpp b/fdbserver/worker.actor.cpp index 1e50036fd87..c42fcff0823 100644 --- a/fdbserver/worker.actor.cpp +++ b/fdbserver/worker.actor.cpp @@ -1097,7 +1097,6 @@ ACTOR Future storageServerRollbackRebooter(std::set storageServerRollbackRebooter(std::set(), Reference(nullptr)); @@ -1480,7 +1478,6 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); - DUMPTOKEN(recruited.getKeyValuesAndFlatMap); Promise recovery; Future f = storageServer(kv, recruited, dbInfo, folder, recovery, connRecord); @@ -1577,7 +1574,6 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getValue); DUMPTOKEN(recruited.getKey); DUMPTOKEN(recruited.getKeyValues); - DUMPTOKEN(recruited.getKeyValuesAndFlatMap); DUMPTOKEN(recruited.getShardState); DUMPTOKEN(recruited.waitMetrics); DUMPTOKEN(recruited.splitMetrics); @@ -1935,7 +1931,6 @@ ACTOR Future workerServer(Reference connRecord, DUMPTOKEN(recruited.getKeyValueStoreType); DUMPTOKEN(recruited.watchValue); DUMPTOKEN(recruited.getKeyValuesStream); - DUMPTOKEN(recruited.getKeyValuesAndFlatMap); // printf("Recruited as storageServer\n"); std::string filename = diff --git a/fdbserver/workloads/IndexPrefetchDemo.actor.cpp b/fdbserver/workloads/IndexPrefetchDemo.actor.cpp deleted file mode 100644 index a614e80462d..00000000000 --- a/fdbserver/workloads/IndexPrefetchDemo.actor.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * IndexPrefetchDemo.actor.cpp - * - * This source file is part of the FoundationDB open source project - * - * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "fdbrpc/simulator.h" -#include "fdbclient/MutationLogReader.actor.h" -#include "fdbclient/Tuple.h" -#include "fdbserver/workloads/workloads.actor.h" -#include "flow/Error.h" -#include "flow/IRandom.h" -#include "flow/flow.h" -#include "flow/actorcompiler.h" // This must be the last #include. - -const KeyRef prefix = "prefix"_sr; -const KeyRef RECORD = "RECORD"_sr; -const KeyRef INDEX = "INDEX"_sr; - -struct IndexPrefetchDemoWorkload : TestWorkload { - bool enabled; - - IndexPrefetchDemoWorkload(WorkloadContext const& wcx) : TestWorkload(wcx) { - enabled = !clientId; // only do this on the "first" client - } - - std::string description() const override { return "IndexPrefetchDemo"; } - - Future start(Database const& cx) override { - if (enabled) { - return _start(cx, this); - } - return Void(); - } - - static KeyRef primaryKey(int i) { return KeyRef("primary-key-of-record-" + std::to_string(i)); } - static KeyRef indexKey(int i) { return KeyRef("index-key-of-record-" + std::to_string(i)); } - static KeyRef data(int i) { return KeyRef("data-of-record-" + std::to_string(i)); } - - ACTOR Future fillInRecords(Database cx, int n) { - std::cout << "start fillInRecords n=" << n << std::endl; - // TODO: When n is large, split into multiple transactions. - state Transaction tr(cx); - try { - tr.reset(); - for (int i = 0; i < n; i++) { - tr.set(Tuple().append(prefix).append(RECORD).append(primaryKey(i)).pack(), - Tuple().append(data(i)).pack()); - tr.set(Tuple().append(prefix).append(INDEX).append(indexKey(i)).append(primaryKey(i)).pack(), - Tuple().pack()); - } - wait(tr.commit()); - std::cout << "finished fillInRecords" << std::endl; - } catch (Error& e) { - std::cout << "failed fillInRecords" << std::endl; - wait(tr.onError(e)); - } - return Void(); - } - - static void showResult(const RangeResult& result) { - std::cout << "result size: " << result.size() << std::endl; - const KeyValueRef* it = result.begin(); - for (; it != result.end(); it++) { - std::cout << "key=" << it->key.printable() << ", value=" << it->value.printable() << std::endl; - } - } - - ACTOR Future scanRange(Database cx, KeyRangeRef range) { - std::cout << "start scanRange " << range.toString() << std::endl; - // TODO: When n is large, split into multiple transactions. - state Transaction tr(cx); - try { - tr.reset(); - RangeResult result = wait(tr.getRange(range, CLIENT_KNOBS->TOO_MANY)); - showResult(result); - } catch (Error& e) { - wait(tr.onError(e)); - } - std::cout << "finished scanRange" << std::endl; - return Void(); - } - - ACTOR Future scanRangeAndFlatMap(Database cx, KeyRange range, Key mapper) { - std::cout << "start scanRangeAndFlatMap " << range.toString() << std::endl; - // TODO: When n is large, split into multiple transactions. - state Transaction tr(cx); - try { - tr.reset(); - RangeResult result = - wait(tr.getRangeAndFlatMap(KeySelector(firstGreaterOrEqual(range.begin), range.arena()), - KeySelector(firstGreaterOrEqual(range.end), range.arena()), - mapper, - GetRangeLimits(CLIENT_KNOBS->TOO_MANY))); - showResult(result); - // result size: 2 - // key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-2\x00, value=\x01data-of-record-2\x00 - // key=\x01prefix\x00\x01RECORD\x00\x01primary-key-of-record-3\x00, value=\x01data-of-record-3\x00 - } catch (Error& e) { - wait(tr.onError(e)); - } - std::cout << "finished scanRangeAndFlatMap" << std::endl; - return Void(); - } - - ACTOR Future _start(Database cx, IndexPrefetchDemoWorkload* self) { - // TODO: Use toml to config - wait(self->fillInRecords(cx, 5)); - - wait(self->scanRange(cx, normalKeys)); - - Key someIndexesBegin = Tuple().append(prefix).append(INDEX).append(indexKey(2)).getDataAsStandalone(); - Key someIndexesEnd = Tuple().append(prefix).append(INDEX).append(indexKey(4)).getDataAsStandalone(); - state KeyRange someIndexes = KeyRangeRef(someIndexesBegin, someIndexesEnd); - wait(self->scanRange(cx, someIndexes)); - - Tuple mapperTuple; - mapperTuple << prefix << RECORD << "{K[3]}"_sr; - Key mapper = mapperTuple.getDataAsStandalone(); - wait(self->scanRangeAndFlatMap(cx, someIndexes, mapper)); - return Void(); - } - - Future check(Database const& cx) override { return true; } - - void getMetrics(std::vector& m) override {} -}; - -WorkloadFactory IndexPrefetchDemoWorkloadFactory("IndexPrefetchDemo"); diff --git a/flow/Platform.h b/flow/Platform.h index 889d2a0b17d..e07bc1a3329 100644 --- a/flow/Platform.h +++ b/flow/Platform.h @@ -613,7 +613,6 @@ inline static void flushOutputStreams() { #if defined(_MSC_VER) #define DLLEXPORT __declspec(dllexport) #elif defined(__GNUG__) -#undef DLLEXPORT #define DLLEXPORT __attribute__((visibility("default"))) #else #error Missing symbol export diff --git a/flow/error_definitions.h b/flow/error_definitions.h index 5e815c57987..e468e46801c 100755 --- a/flow/error_definitions.h +++ b/flow/error_definitions.h @@ -159,12 +159,6 @@ ERROR( blocked_from_network_thread, 2026, "Detected a deadlock in a callback cal ERROR( invalid_config_db_range_read, 2027, "Invalid configuration database range read" ) ERROR( invalid_config_db_key, 2028, "Invalid configuration database key provided" ) ERROR( invalid_config_path, 2029, "Invalid configuration path" ) -ERROR( mapper_bad_index, 2030, "The index in K[] or V[] is not a valid number or out of range" ) -ERROR( mapper_no_such_key, 2031, "A mapped key is not set in database" ) -ERROR( mapper_bad_range_decriptor, 2032, "\"{...}\" must be the last element of the mapper tuple" ) -ERROR( quick_get_key_values_has_more, 2033, "One of the mapped range queries is too large" ) -ERROR( quick_get_value_miss, 2034, "Found a mapped key that is not served in the same SS" ) -ERROR( quick_get_key_values_miss, 2035, "Found a mapped range that is not served in the same SS" ) ERROR( incompatible_protocol_version, 2100, "Incompatible protocol version" ) ERROR( transaction_too_large, 2101, "Transaction exceeds byte limit" ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e5f52a2de3d..22c77e091d9 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -150,7 +150,6 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/MemoryLifetime.toml) add_fdb_test(TEST_FILES fast/MoveKeysCycle.toml) add_fdb_test(TEST_FILES fast/MutationLogReaderCorrectness.toml) - add_fdb_test(TEST_FILES fast/IndexPrefetchDemo.toml) add_fdb_test(TEST_FILES fast/ProtocolVersion.toml) add_fdb_test(TEST_FILES fast/RandomSelector.toml) add_fdb_test(TEST_FILES fast/RandomUnitTests.toml) diff --git a/tests/fast/IndexPrefetchDemo.toml b/tests/fast/IndexPrefetchDemo.toml deleted file mode 100644 index dbdca31f8d5..00000000000 --- a/tests/fast/IndexPrefetchDemo.toml +++ /dev/null @@ -1,6 +0,0 @@ -[[test]] -testTitle = 'IndexPrefetchDemo' -useDB = true - - [[test.workload]] - testName = 'IndexPrefetchDemo' From f15ceb548961b5eb47eb55edf915c3748fba2c29 Mon Sep 17 00:00:00 2001 From: Renxuan Wang Date: Tue, 2 Nov 2021 22:59:28 -0700 Subject: [PATCH 53/69] Add Hostname struct, and fromHostname in NetworkAddress struct. --- fdbclient/MonitorLeader.actor.cpp | 13 +++- fdbrpc/FlowTransport.actor.cpp | 7 +- flow/ProtocolVersion.h | 1 + flow/network.cpp | 103 ++++++++++++++++++++++++++++-- flow/network.h | 63 +++++++++++++++--- 5 files changed, 167 insertions(+), 20 deletions(-) diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index 731669003aa..ba99a196b38 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -48,6 +48,14 @@ std::string trim(std::string const& connectionString) { return trimmed; } +std::string trimFromHostname(std::string const& networkAddress) { + std::string result = networkAddress; + if (result.find("(fromHostname)") != std::string::npos) { + return result.substr(0, result.find("(fromHostname)")); + } + return result; +} + } // namespace FDB_DEFINE_BOOLEAN_PARAM(ConnectionStringNeedsPersisted); @@ -269,9 +277,10 @@ std::string ClusterConnectionString::toString() const { std::string s = key.toString(); s += '@'; for (int i = 0; i < coord.size(); i++) { - if (i) + if (i) { s += ','; - s += coord[i].toString(); + } + s += trimFromHostname(coord[i].toString()); } return s; } diff --git a/fdbrpc/FlowTransport.actor.cpp b/fdbrpc/FlowTransport.actor.cpp index 4f5d5e2d1b8..7c58bc1c7fd 100644 --- a/fdbrpc/FlowTransport.actor.cpp +++ b/fdbrpc/FlowTransport.actor.cpp @@ -1262,8 +1262,11 @@ ACTOR static Future connectionReader(TransportData* transport, } else { peerProtocolVersion = protocolVersion; if (pkt.canonicalRemotePort) { - peerAddress = NetworkAddress( - pkt.canonicalRemoteIp(), pkt.canonicalRemotePort, true, peerAddress.isTLS()); + peerAddress = NetworkAddress(pkt.canonicalRemoteIp(), + pkt.canonicalRemotePort, + true, + peerAddress.isTLS(), + peerAddress.fromHostname); } peer = transport->getOrOpenPeer(peerAddress, false); peer->compatible = compatible; diff --git a/flow/ProtocolVersion.h b/flow/ProtocolVersion.h index d7e4693f1ab..ae6d0da494b 100644 --- a/flow/ProtocolVersion.h +++ b/flow/ProtocolVersion.h @@ -141,6 +141,7 @@ class ProtocolVersion { PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, TSS); PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, ChangeFeed); // FIXME: Change to 7.1 once we cut release PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, BlobGranule); // FIXME: Change to 7.1 once we cut release + PROTOCOL_VERSION_FEATURE(0x0FDB00B070010001LL, NetworkAddressHostnameFlag); }; template <> diff --git a/flow/network.cpp b/flow/network.cpp index f50bbc60608..6ed0616c43b 100644 --- a/flow/network.cpp +++ b/flow/network.cpp @@ -63,7 +63,7 @@ bool IPAddress::isValid() const { return std::get(addr) != 0; } -NetworkAddress NetworkAddress::parse(std::string const& s) { +Hostname Hostname::parse(std::string const& s) { if (s.empty()) { throw connection_string_invalid(); } @@ -76,6 +76,28 @@ NetworkAddress NetworkAddress::parse(std::string const& s) { } else { f = s; } + auto colonPos = f.find_first_of(":"); + return Hostname(f.substr(0, colonPos), f.substr(colonPos + 1), isTLS); +} + +FDB_DEFINE_BOOLEAN_PARAM(NetworkAddressFromHostname); + +NetworkAddress NetworkAddress::parse(std::string const& s) { + if (s.empty()) { + throw connection_string_invalid(); + } + + bool isTLS = false; + NetworkAddressFromHostname fromHostname = NetworkAddressFromHostname::False; + std::string f = s; + if (f.find("(fromHostname)") != std::string::npos) { + fromHostname = NetworkAddressFromHostname::True; + f = f.substr(0, f.find("(fromHostname)")); + } + if (f.size() > 4 && strcmp(f.c_str() + f.size() - 4, ":tls") == 0) { + isTLS = true; + f = f.substr(0, f.size() - 4); + } if (f[0] == '[') { // IPv6 address/port pair is represented as "[ip]:port" @@ -89,13 +111,13 @@ NetworkAddress NetworkAddress::parse(std::string const& s) { if (!addr.present()) { throw connection_string_invalid(); } - return NetworkAddress(addr.get(), port, true, isTLS); + return NetworkAddress(addr.get(), port, true, isTLS, fromHostname); } else { // TODO: Use IPAddress::parse int a, b, c, d, port, count = -1; if (sscanf(f.c_str(), "%d.%d.%d.%d:%d%n", &a, &b, &c, &d, &port, &count) < 5 || count != f.size()) throw connection_string_invalid(); - return NetworkAddress((a << 24) + (b << 16) + (c << 8) + d, port, true, isTLS); + return NetworkAddress((a << 24) + (b << 16) + (c << 8) + d, port, true, isTLS, fromHostname); } } @@ -123,7 +145,11 @@ std::vector NetworkAddress::parseList(std::string const& addrs) } std::string NetworkAddress::toString() const { - return formatIpPort(ip, port) + (isTLS() ? ":tls" : ""); + std::string ipString = formatIpPort(ip, port) + (isTLS() ? ":tls" : ""); + if (fromHostname) { + return ipString + "(fromHostname)"; + } + return ipString; } std::string toIPVectorString(const std::vector& ips) { @@ -158,8 +184,10 @@ Future> INetworkConnections::connect(const std::string& h Future pickEndpoint = map(resolveTCPEndpoint(host, service), [=](std::vector const& addresses) -> NetworkAddress { NetworkAddress addr = addresses[deterministicRandom()->randomInt(0, addresses.size())]; - if (useTLS) + addr.fromHostname = NetworkAddressFromHostname::True; + if (useTLS) { addr.flags = NetworkAddress::FLAG_TLS; + } return addr; }); @@ -185,15 +213,18 @@ TEST_CASE("/flow/network/ipaddress") { auto addrCompressed = "[2001:db8:85a3::8a2e:370:7334]:4800"; ASSERT(addrParsed.isV6()); ASSERT(!addrParsed.isTLS()); + ASSERT(addrParsed.fromHostname == NetworkAddressFromHostname::False); + ASSERT(addrParsed.toString() == addrCompressed); ASSERT(addrParsed.toString() == addrCompressed); } { - auto addr = "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:4800:tls"; + auto addr = "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:4800:tls(fromHostname)"; auto addrParsed = NetworkAddress::parse(addr); - auto addrCompressed = "[2001:db8:85a3::8a2e:370:7334]:4800:tls"; + auto addrCompressed = "[2001:db8:85a3::8a2e:370:7334]:4800:tls(fromHostname)"; ASSERT(addrParsed.isV6()); ASSERT(addrParsed.isTLS()); + ASSERT(addrParsed.fromHostname == NetworkAddressFromHostname::True); ASSERT(addrParsed.toString() == addrCompressed); } @@ -220,4 +251,62 @@ TEST_CASE("/flow/network/ipaddress") { return Void(); } +TEST_CASE("/flow/network/hostname") { + std::string hn1s = "localhost:1234"; + std::string hn2s = "host-name:1234"; + std::string hn3s = "host.name:1234"; + std::string hn4s = "host-name_part1.host-name_part2:1234:tls"; + + std::string hn5s = "127.0.0.1:1234"; + std::string hn6s = "127.0.0.1:1234:tls"; + std::string hn7s = "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:4800"; + std::string hn8s = "[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:4800:tls"; + std::string hn9s = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"; + std::string hn10s = "2001:0db8:85a3:0000:0000:8a2e:0370:7334:tls"; + std::string hn11s = "[::1]:4800"; + std::string hn12s = "[::1]:4800:tls"; + std::string hn13s = "1234"; + + auto hn1 = Hostname::parse(hn1s); + ASSERT(hn1.toString() == hn1s); + ASSERT(hn1.host == "localhost"); + ASSERT(hn1.service == "1234"); + ASSERT(!hn1.useTLS); + + auto hn2 = Hostname::parse(hn2s); + ASSERT(hn2.toString() == hn2s); + ASSERT(hn2.host == "host-name"); + ASSERT(hn2.service == "1234"); + ASSERT(!hn2.useTLS); + + auto hn3 = Hostname::parse(hn3s); + ASSERT(hn3.toString() == hn3s); + ASSERT(hn3.host == "host.name"); + ASSERT(hn3.service == "1234"); + ASSERT(!hn3.useTLS); + + auto hn4 = Hostname::parse(hn4s); + ASSERT(hn4.toString() == hn4s); + ASSERT(hn4.host == "host-name_part1.host-name_part2"); + ASSERT(hn4.service == "1234"); + ASSERT(hn4.useTLS); + + ASSERT(Hostname::isHostname(hn1s)); + ASSERT(Hostname::isHostname(hn2s)); + ASSERT(Hostname::isHostname(hn3s)); + ASSERT(Hostname::isHostname(hn4s)); + + ASSERT(!Hostname::isHostname(hn5s)); + ASSERT(!Hostname::isHostname(hn6s)); + ASSERT(!Hostname::isHostname(hn7s)); + ASSERT(!Hostname::isHostname(hn8s)); + ASSERT(!Hostname::isHostname(hn9s)); + ASSERT(!Hostname::isHostname(hn10s)); + ASSERT(!Hostname::isHostname(hn11s)); + ASSERT(!Hostname::isHostname(hn12s)); + ASSERT(!Hostname::isHostname(hn13s)); + + return Void(); +} + NetworkInfo::NetworkInfo() : handshakeLock(new FlowLock(FLOW_KNOBS->TLS_HANDSHAKE_LIMIT)) {} diff --git a/flow/network.h b/flow/network.h index a0710f5ce4c..f693aa491d8 100644 --- a/flow/network.h +++ b/flow/network.h @@ -24,6 +24,7 @@ #pragma once #include +#include #include #include #include @@ -33,6 +34,7 @@ #include "boost/asio/ssl.hpp" #endif #include "flow/Arena.h" +#include "flow/BooleanParam.h" #include "flow/IRandom.h" #include "flow/Trace.h" #include "flow/WriteOnlySet.h" @@ -132,6 +134,29 @@ inline TaskPriority incrementPriorityIfEven(TaskPriority p) { class Void; +struct Hostname { + std::string host; + std::string service; // decimal port number + bool useTLS; + + Hostname(std::string host, std::string service, bool useTLS) : host(host), service(service), useTLS(useTLS) {} + + // Allow hostnames in forms like following: + // hostname:1234 + // host.name:1234 + // host-name:1234 + // host-name_part1.host-name_part2:1234:tls + static bool isHostname(std::string& s) { + std::regex validation("^([\\w\\-]+\\.?)+:([\\d]+){1,}(:tls)?$"); + std::regex ipv4Validation("^([\\d]{1,3}\\.?){4,}:([\\d]+){1,}(:tls)?$"); + return !std::regex_match(s, ipv4Validation) && std::regex_match(s, validation); + } + + static Hostname parse(std::string const& str); + + std::string toString() const { return host + ":" + service + (useTLS ? ":tls" : ""); } +}; + struct IPAddress { typedef boost::asio::ip::address_v6::bytes_type IPAddressStore; static_assert(std::is_same>::value, @@ -201,23 +226,38 @@ struct Traceable : std::true_type { static std::string toString(const IPAddress& value) { return value.toString(); } }; +FDB_DECLARE_BOOLEAN_PARAM(NetworkAddressFromHostname); + struct NetworkAddress { constexpr static FileIdentifier file_identifier = 14155727; // A NetworkAddress identifies a particular running server (i.e. a TCP endpoint). IPAddress ip; uint16_t port; uint16_t flags; + NetworkAddressFromHostname fromHostname; enum { FLAG_PRIVATE = 1, FLAG_TLS = 2 }; - NetworkAddress() : ip(IPAddress(0)), port(0), flags(FLAG_PRIVATE) {} - NetworkAddress(const IPAddress& address, uint16_t port, bool isPublic, bool isTLS) - : ip(address), port(port), flags((isPublic ? 0 : FLAG_PRIVATE) | (isTLS ? FLAG_TLS : 0)) {} - NetworkAddress(uint32_t ip, uint16_t port, bool isPublic, bool isTLS) - : NetworkAddress(IPAddress(ip), port, isPublic, isTLS) {} - - NetworkAddress(uint32_t ip, uint16_t port) : NetworkAddress(ip, port, false, false) {} - NetworkAddress(const IPAddress& ip, uint16_t port) : NetworkAddress(ip, port, false, false) {} + NetworkAddress() + : ip(IPAddress(0)), port(0), flags(FLAG_PRIVATE), fromHostname(NetworkAddressFromHostname::False) {} + NetworkAddress(const IPAddress& address, + uint16_t port, + bool isPublic, + bool isTLS, + NetworkAddressFromHostname fromHostname = NetworkAddressFromHostname::False) + : ip(address), port(port), flags((isPublic ? 0 : FLAG_PRIVATE) | (isTLS ? FLAG_TLS : 0)), + fromHostname(fromHostname) {} + NetworkAddress(uint32_t ip, + uint16_t port, + bool isPublic, + bool isTLS, + NetworkAddressFromHostname fromHostname = NetworkAddressFromHostname::False) + : NetworkAddress(IPAddress(ip), port, isPublic, isTLS, fromHostname) {} + + NetworkAddress(uint32_t ip, uint16_t port) + : NetworkAddress(ip, port, false, false, NetworkAddressFromHostname::False) {} + NetworkAddress(const IPAddress& ip, uint16_t port) + : NetworkAddress(ip, port, false, false, NetworkAddressFromHostname::False) {} bool operator==(NetworkAddress const& r) const { return ip == r.ip && port == r.port && flags == r.flags; } bool operator!=(NetworkAddress const& r) const { return ip != r.ip || port != r.port || flags != r.flags; } @@ -256,7 +296,8 @@ struct NetworkAddress { template void serialize(Ar& ar) { if constexpr (is_fb_function) { - serializer(ar, ip, port, flags); + bool fromHN = fromHostname == NetworkAddressFromHostname::True; + serializer(ar, ip, port, flags, fromHN); } else { if (ar.isDeserializing && !ar.protocolVersion().hasIPv6()) { uint32_t ipV4; @@ -265,6 +306,10 @@ struct NetworkAddress { } else { serializer(ar, ip, port, flags); } + if (ar.protocolVersion().hasNetworkAddressHostnameFlag()) { + bool fromHN = fromHostname == NetworkAddressFromHostname::True; + serializer(ar, fromHN); + } } } }; From 85dff214a47b9e2aca6109ab5c008aad1df72f10 Mon Sep 17 00:00:00 2001 From: Renxuan Wang Date: Thu, 4 Nov 2021 10:48:49 -0700 Subject: [PATCH 54/69] Address comments. --- fdbclient/MonitorLeader.actor.cpp | 8 ++++---- flow/network.cpp | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fdbclient/MonitorLeader.actor.cpp b/fdbclient/MonitorLeader.actor.cpp index ba99a196b38..07327d91074 100644 --- a/fdbclient/MonitorLeader.actor.cpp +++ b/fdbclient/MonitorLeader.actor.cpp @@ -49,11 +49,11 @@ std::string trim(std::string const& connectionString) { } std::string trimFromHostname(std::string const& networkAddress) { - std::string result = networkAddress; - if (result.find("(fromHostname)") != std::string::npos) { - return result.substr(0, result.find("(fromHostname)")); + const auto& pos = networkAddress.find("(fromHostname)"); + if (pos != std::string::npos) { + return networkAddress.substr(0, pos); } - return result; + return networkAddress; } } // namespace diff --git a/flow/network.cpp b/flow/network.cpp index 6ed0616c43b..68fa7e4a265 100644 --- a/flow/network.cpp +++ b/flow/network.cpp @@ -90,9 +90,10 @@ NetworkAddress NetworkAddress::parse(std::string const& s) { bool isTLS = false; NetworkAddressFromHostname fromHostname = NetworkAddressFromHostname::False; std::string f = s; - if (f.find("(fromHostname)") != std::string::npos) { + const auto& pos = f.find("(fromHostname)"); + if (pos != std::string::npos) { fromHostname = NetworkAddressFromHostname::True; - f = f.substr(0, f.find("(fromHostname)")); + f = f.substr(0, pos); } if (f.size() > 4 && strcmp(f.c_str() + f.size() - 4, ":tls") == 0) { isTLS = true; From e08de9e3040f833ea3b670852040f90bd4a50565 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Thu, 4 Nov 2021 12:07:59 -0700 Subject: [PATCH 55/69] Update transaction-tagging.rst documentation --- documentation/sphinx/source/transaction-tagging.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/documentation/sphinx/source/transaction-tagging.rst b/documentation/sphinx/source/transaction-tagging.rst index 9b4bef2ea35..e0e89bf347a 100644 --- a/documentation/sphinx/source/transaction-tagging.rst +++ b/documentation/sphinx/source/transaction-tagging.rst @@ -13,14 +13,14 @@ Adding tags to transactions Tags are added to transaction by using transaction options. Each transaction can include up to five tags, and each tag must not exceed 16 characters. There are two options that can be used to add tags: -* ``TAG`` - Adds a tag to the transaction. This tag will not be used for auto-throttling and is not included with read requests. Tags set in this way can only be throttled manually. -* ``AUTO_THROTTLE_TAG`` - Adds a tag to the transaction that can be both automatically and manually throttled. To support busy tag detection, these tags may be sent as part of read requests. +* ``TAG`` - Adds a tag to the transaction. This tag will not be used for auto-throttling and is not included with read or commit requests. Tags set in this way can only be throttled manually. +* ``AUTO_THROTTLE_TAG`` - Adds a tag to the transaction that can be both automatically and manually throttled. To support busy tag detection, these tags may be sent as part of read or commit requests. See the documentation for your particular language binding for details about setting this option. .. note:: If setting hierarchical tags, it is recommended that you not use auto-throttle tags at multiple levels of the hierarchy. Otherwise, the cluster will favor throttling those tags set at higher levels, as they will include more transactions. -.. note:: Tags must be included as part of all get read version requests, and a sample of read requests will include auto-throttled tags. Additionally, each tag imposes additional costs with those requests. It is therefore recommended that you not use excessive numbers or lengths of transaction tags. +.. note:: Tags must be included as part of all get read version requests, and a sample of read and commit requests will include auto-throttled tags. Additionally, each tag imposes additional costs with those requests. It is therefore recommended that you not use excessive numbers or lengths of transaction tags. Tag throttling overview ======================= @@ -48,7 +48,7 @@ When a transaction tag is throttled, this information will be communicated to th Automatic transaction tag throttling ==================================== -When using the ``AUTO_THROTTLE_TAG`` transaction option, the cluster will monitor read activity for the chosen tags and may choose to reduce a tag's transaction rate limit if a storage server gets busy enough and has a sufficient portion of its read traffic coming from that one tag. +When using the ``AUTO_THROTTLE_TAG`` transaction option, the cluster will monitor activity for the chosen tags and may choose to reduce a tag's transaction rate limit if a storage server gets busy enough and has a sufficient portion of its traffic coming from that one tag. When a tag is auto-throttled, the default priority transaction rate will be decreased to reduce the percentage of traffic attributable to that tag to a reasonable amount of total traffic on the affected storage server(s), and batch priority transactions for that tag will be stopped completely. From e4ca7e9511963508e54f6aaab8020c165cc29f32 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 17:40:37 -0700 Subject: [PATCH 56/69] Fix comment for RequestStream::tryGetReply --- fdbrpc/fdbrpc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fdbrpc/fdbrpc.h b/fdbrpc/fdbrpc.h index fd70b8520dc..7a4c07f9104 100644 --- a/fdbrpc/fdbrpc.h +++ b/fdbrpc/fdbrpc.h @@ -662,8 +662,8 @@ class RequestStream { } // stream.tryGetReply( request ) - // Unreliable at most once delivery: Either delivers request and returns a reply, or returns failure - // (Optional()) eventually. If a reply is returned, request was delivered exactly once. If cancelled or returns + // Unreliable at most once delivery: Either delivers request and returns a reply, or returns an error eventually. + // If a reply is returned, request was delivered exactly once. If cancelled or returns // failure, request was or will be delivered zero or one times. The caller must be capable of retrying if this // request returns failure template From 30cef5174625a84477ec7edd07039cebf5854938 Mon Sep 17 00:00:00 2001 From: sfc-gh-tclinkenbeard Date: Sun, 31 Oct 2021 20:24:29 -0700 Subject: [PATCH 57/69] Improve tracing in ddSnapCreateCore --- fdbserver/DataDistribution.actor.cpp | 44 ++++++++++++++++------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/fdbserver/DataDistribution.actor.cpp b/fdbserver/DataDistribution.actor.cpp index 21697dedc1d..84f13137a75 100644 --- a/fdbserver/DataDistribution.actor.cpp +++ b/fdbserver/DataDistribution.actor.cpp @@ -6374,6 +6374,19 @@ static std::set const& normalDataDistributorErrors() { return s; } +ACTOR template +Future sendSnapReq(RequestStream stream, Req req, Error e) { + ErrorOr reply = wait(stream.tryGetReply(req)); + if (reply.isError()) { + TraceEvent("SnapDataDistributor_ReqError") + .error(reply.getError(), true) + .detail("ConvertedErrorType", e.what()) + .detail("Peer", stream.getEndpoint().getPrimaryAddress()); + throw e; + } + return Void(); +} + ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference const> db) { state Database cx = openDBOnServer(db, TaskPriority::DefaultDelay, LockAware::True); state ReadYourWritesTransaction tr(cx); @@ -6401,9 +6414,8 @@ ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference> disablePops; disablePops.reserve(tlogs.size()); for (const auto& tlog : tlogs) { - disablePops.push_back(transformErrors( - throwErrorOr(tlog.disablePopRequest.tryGetReply(TLogDisablePopRequest(snapReq.snapUID))), - snap_disable_tlog_pop_failed())); + disablePops.push_back(sendSnapReq( + tlog.disablePopRequest, TLogDisablePopRequest{ snapReq.snapUID }, snap_disable_tlog_pop_failed())); } wait(waitForAll(disablePops)); @@ -6419,10 +6431,9 @@ ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference> storageSnapReqs; storageSnapReqs.reserve(storageWorkers.size()); for (const auto& worker : storageWorkers) { - storageSnapReqs.push_back( - transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest( - snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("storage")))), - snap_storage_failed())); + storageSnapReqs.push_back(sendSnapReq(worker.workerSnapReq, + WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, "storage"_sr), + snap_storage_failed())); } wait(waitForAll(storageSnapReqs)); @@ -6433,10 +6444,9 @@ ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference> tLogSnapReqs; tLogSnapReqs.reserve(tlogs.size()); for (const auto& tlog : tlogs) { - tLogSnapReqs.push_back( - transformErrors(throwErrorOr(tlog.snapRequest.tryGetReply( - TLogSnapRequest(snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("tlog")))), - snap_tlog_failed())); + tLogSnapReqs.push_back(sendSnapReq(tlog.snapRequest, + TLogSnapRequest{ snapReq.snapPayload, snapReq.snapUID, "tlog"_sr }, + snap_tlog_failed())); } wait(waitForAll(tLogSnapReqs)); @@ -6447,9 +6457,8 @@ ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference> enablePops; enablePops.reserve(tlogs.size()); for (const auto& tlog : tlogs) { - enablePops.push_back( - transformErrors(throwErrorOr(tlog.enablePopRequest.tryGetReply(TLogEnablePopRequest(snapReq.snapUID))), - snap_enable_tlog_pop_failed())); + enablePops.push_back(sendSnapReq( + tlog.enablePopRequest, TLogEnablePopRequest{ snapReq.snapUID }, snap_enable_tlog_pop_failed())); } wait(waitForAll(enablePops)); @@ -6464,10 +6473,9 @@ ACTOR Future ddSnapCreateCore(DistributorSnapRequest snapReq, Reference> coordSnapReqs; coordSnapReqs.reserve(coordWorkers.size()); for (const auto& worker : coordWorkers) { - coordSnapReqs.push_back( - transformErrors(throwErrorOr(worker.workerSnapReq.tryGetReply(WorkerSnapRequest( - snapReq.snapPayload, snapReq.snapUID, LiteralStringRef("coord")))), - snap_coord_failed())); + coordSnapReqs.push_back(sendSnapReq(worker.workerSnapReq, + WorkerSnapRequest(snapReq.snapPayload, snapReq.snapUID, "coord"_sr), + snap_coord_failed())); } wait(waitForAll(coordSnapReqs)); TraceEvent("SnapDataDistributor_AfterSnapCoords") From 86ea63d1daea9ae45ebe4381b17549535848905a Mon Sep 17 00:00:00 2001 From: Yao Xiao <87789492+yao-xiao-github@users.noreply.github.com> Date: Thu, 4 Nov 2021 23:21:54 -0700 Subject: [PATCH 58/69] Add libsanitizer to fdb image. (#5918) --- packaging/docker/Dockerfile.eks | 1 + 1 file changed, 1 insertion(+) diff --git a/packaging/docker/Dockerfile.eks b/packaging/docker/Dockerfile.eks index b2aaf7b4f41..38fc61f56e9 100644 --- a/packaging/docker/Dockerfile.eks +++ b/packaging/docker/Dockerfile.eks @@ -25,6 +25,7 @@ RUN yum install -y \ gdb \ jq \ less \ + libsanitizer \ lsof \ nc \ net-tools \ From d97d9681766766836f991ec65f64b66b654b966c Mon Sep 17 00:00:00 2001 From: Steve Atherton Date: Mon, 8 Nov 2021 13:04:53 -0800 Subject: [PATCH 59/69] Added KeyBackedObjectMap and KeyBackedObjectProperty classes for storing serializable objects in FDB (#5896) * Cleaned up some lambda capture workaround since x=y captures weren't available when these classes were originally written. * Added KeyBackedObjectMap and KeyBackObjectProperty, which work like KeyBackedMap and KeyBackedProperty but use ObjectWriter/Reader for Value serialization so that the type can evolve over time. * Disabled unit tests which shouldn't run as part of random selection. --- fdbclient/KeyBackedTypes.h | 192 +++++++++++++++++++++++++---- fdbserver/BlobManager.actor.cpp | 2 +- fdbserver/VersionedBTree.actor.cpp | 2 +- 3 files changed, 172 insertions(+), 24 deletions(-) diff --git a/fdbclient/KeyBackedTypes.h b/fdbclient/KeyBackedTypes.h index fc73a737494..881415dce18 100644 --- a/fdbclient/KeyBackedTypes.h +++ b/fdbclient/KeyBackedTypes.h @@ -26,7 +26,9 @@ #include "fdbclient/IClientApi.h" #include "fdbclient/ReadYourWrites.h" #include "fdbclient/Subspace.h" +#include "flow/ObjectSerializer.h" #include "flow/genericactors.actor.h" +#include "flow/serialize.h" // Codec is a utility struct to convert a type to and from a Tuple. It is used by the template // classes below like KeyBackedProperty and KeyBackedMap to convert key parts and values @@ -168,14 +170,8 @@ class KeyBackedProperty { Future getOrThrow(Reference tr, Snapshot snapshot = Snapshot::False, Error err = key_not_found()) const { - auto keyCopy = key; - auto backtrace = platform::get_backtrace(); return map(get(tr, snapshot), [=](Optional val) -> T { if (!val.present()) { - TraceEvent(SevInfo, "KeyBackedProperty_KeyNotFound") - .detail("Key", keyCopy) - .detail("Err", err.code()) - .detail("ParentTrace", backtrace.c_str()); throw err; } @@ -184,45 +180,39 @@ class KeyBackedProperty { } Future> get(Database cx, Snapshot snapshot = Snapshot::False) const { - auto& copy = *this; - return runRYWTransaction(cx, [=](Reference tr) { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - return copy.get(tr, snapshot); + return self.get(tr, snapshot); }); } Future getD(Database cx, Snapshot snapshot = Snapshot::False, T defaultValue = T()) const { - auto& copy = *this; - return runRYWTransaction(cx, [=](Reference tr) { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - return copy.getD(tr, snapshot, defaultValue); + return self.getD(tr, snapshot, defaultValue); }); } Future getOrThrow(Database cx, Snapshot snapshot = Snapshot::False, Error err = key_not_found()) const { - auto& copy = *this; - return runRYWTransaction(cx, [=](Reference tr) { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - return copy.getOrThrow(tr, snapshot, err); + return self.getOrThrow(tr, snapshot, err); }); } void set(Reference tr, T const& val) { return tr->set(key, Codec::pack(val).pack()); } Future set(Database cx, T const& val) { - auto _key = key; - Value _val = Codec::pack(val).pack(); - return runRYWTransaction(cx, [_key, _val](Reference tr) { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); tr->setOption(FDBTransactionOptions::LOCK_AWARE); - tr->set(_key, _val); - + self->set(tr, val); return Future(Void()); }); } @@ -262,12 +252,12 @@ class KeyBackedBinaryValue { Key key; }; -// Convenient read/write access to a sorted map of KeyType to ValueType that has key as its prefix +// Convenient read/write access to a sorted map of KeyType to ValueType under prefix // Even though 'this' is not actually mutated, methods that change db keys are not const. template class KeyBackedMap { public: - KeyBackedMap(KeyRef key) : space(key) {} + KeyBackedMap(KeyRef prefix) : space(prefix) {} typedef _KeyType KeyType; typedef _ValueType ValueType; @@ -336,6 +326,164 @@ class KeyBackedMap { Subspace space; }; +// Convenient read/write access to a single value of type T stored at key +// Even though 'this' is not actually mutated, methods that change the db key are not const. +template +class KeyBackedObjectProperty { +public: + KeyBackedObjectProperty(KeyRef key, VersionOptions versionOptions) : key(key), versionOptions(versionOptions) {} + Future> get(Reference tr, Snapshot snapshot = Snapshot::False) const { + + return map(tr->get(key, snapshot), [vo = versionOptions](Optional const& val) -> Optional { + if (val.present()) + return ObjectReader::fromStringRef(val.get(), vo); + return {}; + }); + } + + // Get property's value or defaultValue if it doesn't exist + Future getD(Reference tr, + Snapshot snapshot = Snapshot::False, + T defaultValue = T()) const { + return map(get(tr, snapshot), [=](Optional val) -> T { return val.present() ? val.get() : defaultValue; }); + } + // Get property's value or throw error if it doesn't exist + Future getOrThrow(Reference tr, + Snapshot snapshot = Snapshot::False, + Error err = key_not_found()) const { + return map(get(tr, snapshot), [=](Optional val) -> T { + if (!val.present()) { + throw err; + } + + return val.get(); + }); + } + + Future> get(Database cx, Snapshot snapshot = Snapshot::False) const { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + return self.get(tr, snapshot); + }); + } + + Future getD(Database cx, Snapshot snapshot = Snapshot::False, T defaultValue = T()) const { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + return self.getD(tr, snapshot, defaultValue); + }); + } + + Future getOrThrow(Database cx, Snapshot snapshot = Snapshot::False, Error err = key_not_found()) const { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + + return self.getOrThrow(tr, snapshot, err); + }); + } + + void set(Reference tr, T const& val) { + return tr->set(key, ObjectWriter::toValue(val, versionOptions)); + } + + Future set(Database cx, T const& val) { + return runRYWTransaction(cx, [=, self = *this](Reference tr) { + tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS); + tr->setOption(FDBTransactionOptions::LOCK_AWARE); + self.set(tr, val); + return Future(Void()); + }); + } + + void clear(Reference tr) { return tr->clear(key); } + + Key key; + VersionOptions versionOptions; +}; + +// Convenient read/write access to a sorted map of KeyType to ValueType under key prefix +// ValueType is encoded / decoded with ObjectWriter/ObjectReader +// Even though 'this' is not actually mutated, methods that change db keys are not const. +template +class KeyBackedObjectMap { +public: + KeyBackedObjectMap(KeyRef prefix, VersionOptions versionOptions) : space(prefix), versionOptions(versionOptions) {} + + typedef _KeyType KeyType; + typedef _ValueType ValueType; + typedef std::pair PairType; + typedef std::vector PairsType; + + // If end is not present one key past the end of the map is used. + Future getRange(Reference tr, + KeyType const& begin, + Optional const& end, + int limit, + Snapshot snapshot = Snapshot::False, + Reverse reverse = Reverse::False) const { + Key endKey = end.present() ? space.pack(Codec::pack(end.get())) : space.range().end; + return map( + tr->getRange( + KeyRangeRef(space.pack(Codec::pack(begin)), endKey), GetRangeLimits(limit), snapshot, reverse), + [self = *this](RangeResult const& kvs) -> PairsType { + PairsType results; + for (int i = 0; i < kvs.size(); ++i) { + KeyType key = Codec::unpack(self.space.unpack(kvs[i].key)); + ValueType val = ObjectReader::fromStringRef(kvs[i].value, self.versionOptions); + results.push_back(PairType(key, val)); + } + return results; + }); + } + + Future> get(Reference tr, + KeyType const& key, + Snapshot snapshot = Snapshot::False) const { + return map(tr->get(space.pack(Codec::pack(key)), snapshot), + [vo = versionOptions](Optional const& val) -> Optional { + if (val.present()) + return ObjectReader::fromStringRef(val.get(), vo); + return {}; + }); + } + + // Returns a Property that can be get/set that represents key's entry in this this. + KeyBackedObjectProperty getProperty(KeyType const& key) const { + return KeyBackedObjectProperty(space.pack(Codec::pack(key)), + versionOptions); + } + + // Returns the expectedSize of the set key + int set(Reference tr, KeyType const& key, ValueType const& val) { + Key k = space.pack(Codec::pack(key)); + Value v = ObjectWriter::toValue(val, versionOptions); + tr->set(k, v); + return k.expectedSize() + v.expectedSize(); + } + + void erase(Reference tr, KeyType const& key) { + return tr->clear(space.pack(Codec::pack(key))); + } + + void erase(Reference tr, KeyType const& key) { + return tr->clear(space.pack(Codec::pack(key))); + } + + void erase(Reference tr, KeyType const& begin, KeyType const& end) { + return tr->clear(KeyRangeRef(space.pack(Codec::pack(begin)), space.pack(Codec::pack(end)))); + } + + void clear(Reference tr) { return tr->clear(space.range()); } + + Subspace space; + VersionOptions versionOptions; +}; + template class KeyBackedSet { public: diff --git a/fdbserver/BlobManager.actor.cpp b/fdbserver/BlobManager.actor.cpp index 557352f11c7..23d05225c8d 100644 --- a/fdbserver/BlobManager.actor.cpp +++ b/fdbserver/BlobManager.actor.cpp @@ -1169,7 +1169,7 @@ ACTOR Future blobManager(BlobManagerInterface bmInterf, // DB has [A - B) and [C - D). They should show up in knownBlobRanges, and [B - C) should be in removed. // DB has [B - C). It should show up in knownBlobRanges, [B - C) should be in added, and [A - B) and [C - D) should // be in removed. -TEST_CASE("/blobmanager/updateranges") { +TEST_CASE(":/blobmanager/updateranges") { KeyRangeMap knownBlobRanges(false, normalKeys.end); Arena ar; diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 7c79052dd30..05abe0b7951 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -10254,7 +10254,7 @@ ACTOR Future randomRangeScans(IKeyValueStore* kvs, return Void(); } -TEST_CASE("!/redwood/performance/randomRangeScans") { +TEST_CASE(":/redwood/performance/randomRangeScans") { state int prefixLen = 30; state int suffixSize = 12; state int valueSize = 100; From 7df059570ac0c8a174cec438cb6f2d4edccaefa5 Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 8 Nov 2021 15:43:32 -0700 Subject: [PATCH 60/69] Make sure unit tests are run often enough --- fdbrpc/FlowTests.actor.cpp | 2 +- fdbserver/OldTLogServer_6_0.actor.cpp | 36 ------------------- fdbserver/OldTLogServer_6_2.actor.cpp | 36 ------------------- fdbserver/Status.actor.cpp | 2 +- fdbserver/TLogServer.actor.cpp | 2 +- fdbserver/VersionedBTree.actor.cpp | 8 ++--- tests/rare/AllSimUnitTests.toml | 9 +++++ tests/rare/RedwoodCorrectnessBTree.toml | 4 +-- tests/rare/RedwoodDeltaTree.toml | 16 +++++++++ tests/rare/StatusBuilderPerf.toml | 9 +++++ .../TLogVersionMessagesOverheadFactor.toml | 9 +++++ 11 files changed, 52 insertions(+), 81 deletions(-) create mode 100644 tests/rare/AllSimUnitTests.toml create mode 100644 tests/rare/RedwoodDeltaTree.toml create mode 100644 tests/rare/StatusBuilderPerf.toml create mode 100644 tests/rare/TLogVersionMessagesOverheadFactor.toml diff --git a/fdbrpc/FlowTests.actor.cpp b/fdbrpc/FlowTests.actor.cpp index 65d3d36019f..e08963073fc 100644 --- a/fdbrpc/FlowTests.actor.cpp +++ b/fdbrpc/FlowTests.actor.cpp @@ -809,7 +809,7 @@ TEST_CASE("/flow/flow/chooseTwoActor") { return Void(); } -TEST_CASE("/flow/flow/perf/actor patterns") { +TEST_CASE("#flow/flow/perf/actor patterns") { double start; int N = 1000000; diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index de3fb42a5e8..d0ef9acbc60 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -2886,40 +2886,4 @@ struct DequeAllocator : std::allocator { } }; -TEST_CASE("/fdbserver/tlogserver/VersionMessagesOverheadFactor") { - - typedef std::pair TestType; // type used by versionMessages - - for (int i = 1; i < 9; ++i) { - for (int j = 0; j < 20; ++j) { - DequeAllocatorStats::allocatedBytes = 0; - DequeAllocator allocator; - std::deque> d(allocator); - - int numElements = deterministicRandom()->randomInt(pow(10, i - 1), pow(10, i)); - for (int k = 0; k < numElements; ++k) { - d.push_back(TestType()); - } - - int removedElements = 0; // deterministicRandom()->randomInt(0, numElements); // FIXME: the overhead factor - // does not accurately account for removal! - for (int k = 0; k < removedElements; ++k) { - d.pop_front(); - } - - int64_t dequeBytes = DequeAllocatorStats::allocatedBytes + sizeof(std::deque); - int64_t insertedBytes = (numElements - removedElements) * sizeof(TestType); - double overheadFactor = - std::max(insertedBytes, dequeBytes - 10000) / - insertedBytes; // We subtract 10K here as an estimated upper bound for the fixed cost of an std::deque - // fprintf(stderr, "%d elements (%d inserted, %d removed):\n", numElements-removedElements, numElements, - // removedElements); fprintf(stderr, "Allocated %lld bytes to store %lld bytes (%lf overhead factor)\n", - // dequeBytes, insertedBytes, overheadFactor); - ASSERT(overheadFactor * 1024 <= SERVER_KNOBS->VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS); - } - } - - return Void(); -} - } // namespace oldTLog_6_0 diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index 4893a5da032..3cae30cf089 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -3374,40 +3374,4 @@ struct DequeAllocator : std::allocator { } }; -TEST_CASE("/fdbserver/tlogserver/VersionMessagesOverheadFactor") { - - typedef std::pair TestType; // type used by versionMessages - - for (int i = 1; i < 9; ++i) { - for (int j = 0; j < 20; ++j) { - DequeAllocatorStats::allocatedBytes = 0; - DequeAllocator allocator; - std::deque> d(allocator); - - int numElements = deterministicRandom()->randomInt(pow(10, i - 1), pow(10, i)); - for (int k = 0; k < numElements; ++k) { - d.push_back(TestType()); - } - - int removedElements = 0; // deterministicRandom()->randomInt(0, numElements); // FIXME: the overhead factor - // does not accurately account for removal! - for (int k = 0; k < removedElements; ++k) { - d.pop_front(); - } - - int64_t dequeBytes = DequeAllocatorStats::allocatedBytes + sizeof(std::deque); - int64_t insertedBytes = (numElements - removedElements) * sizeof(TestType); - double overheadFactor = - std::max(insertedBytes, dequeBytes - 10000) / - insertedBytes; // We subtract 10K here as an estimated upper bound for the fixed cost of an std::deque - // fprintf(stderr, "%d elements (%d inserted, %d removed):\n", numElements-removedElements, numElements, - // removedElements); fprintf(stderr, "Allocated %lld bytes to store %lld bytes (%lf overhead factor)\n", - // dequeBytes, insertedBytes, overheadFactor); - ASSERT(overheadFactor * 1024 <= SERVER_KNOBS->VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS); - } - } - - return Void(); -} - } // namespace oldTLog_6_2 diff --git a/fdbserver/Status.actor.cpp b/fdbserver/Status.actor.cpp index 8176dac4bc4..64062231860 100644 --- a/fdbserver/Status.actor.cpp +++ b/fdbserver/Status.actor.cpp @@ -3308,7 +3308,7 @@ JsonBuilderObject randomDocument(const std::vector& strings, int& l return r; } -TEST_CASE("/status/json/builderPerf") { +TEST_CASE("Lstatus/json/builderPerf") { std::vector strings; int c = 1000000; printf("Generating random strings\n"); diff --git a/fdbserver/TLogServer.actor.cpp b/fdbserver/TLogServer.actor.cpp index c487fe15da6..e4083fd8cc9 100644 --- a/fdbserver/TLogServer.actor.cpp +++ b/fdbserver/TLogServer.actor.cpp @@ -3453,7 +3453,7 @@ struct DequeAllocator : std::allocator { } }; -TEST_CASE("/fdbserver/tlogserver/VersionMessagesOverheadFactor") { +TEST_CASE("Lfdbserver/tlogserver/VersionMessagesOverheadFactor") { typedef std::pair TestType; // type used by versionMessages diff --git a/fdbserver/VersionedBTree.actor.cpp b/fdbserver/VersionedBTree.actor.cpp index 7c79052dd30..005c9660bd9 100644 --- a/fdbserver/VersionedBTree.actor.cpp +++ b/fdbserver/VersionedBTree.actor.cpp @@ -8065,7 +8065,7 @@ TEST_CASE("/redwood/correctness/unit/RedwoodRecordRef") { return Void(); } -TEST_CASE("/redwood/correctness/unit/deltaTree/RedwoodRecordRef") { +TEST_CASE("Lredwood/correctness/unit/deltaTree/RedwoodRecordRef") { // Sanity check on delta tree node format ASSERT(DeltaTree2::Node::headerSize(false) == 4); ASSERT(DeltaTree2::Node::headerSize(true) == 8); @@ -8241,7 +8241,7 @@ TEST_CASE("/redwood/correctness/unit/deltaTree/RedwoodRecordRef") { return Void(); } -TEST_CASE("/redwood/correctness/unit/deltaTree/RedwoodRecordRef2") { +TEST_CASE("Lredwood/correctness/unit/deltaTree/RedwoodRecordRef2") { // Sanity check on delta tree node format ASSERT(DeltaTree2::Node::headerSize(false) == 4); ASSERT(DeltaTree2::Node::headerSize(true) == 8); @@ -8420,7 +8420,7 @@ TEST_CASE("/redwood/correctness/unit/deltaTree/RedwoodRecordRef2") { return Void(); } -TEST_CASE("/redwood/correctness/unit/deltaTree/IntIntPair") { +TEST_CASE("Lredwood/correctness/unit/deltaTree/IntIntPair") { const int N = 200; IntIntPair lowerBound = { 0, 0 }; IntIntPair upperBound = { 1000, 1000 }; @@ -9017,7 +9017,7 @@ TEST_CASE(":/redwood/pager/ArenaPage") { return Void(); } -TEST_CASE("/redwood/correctness/btree") { +TEST_CASE("Lredwood/correctness/btree") { g_redwoodMetricsActor = Void(); // Prevent trace event metrics from starting g_redwoodMetrics.clear(); diff --git a/tests/rare/AllSimUnitTests.toml b/tests/rare/AllSimUnitTests.toml new file mode 100644 index 00000000000..5509fd8b4b8 --- /dev/null +++ b/tests/rare/AllSimUnitTests.toml @@ -0,0 +1,9 @@ +[[test]] +testTitle = 'UnitTests' +useDB = false +startDelay = 0 + + [[test.workload]] + testName = 'UnitTests' + #maxTestCases = 1 + testsMatching = '/' \ No newline at end of file diff --git a/tests/rare/RedwoodCorrectnessBTree.toml b/tests/rare/RedwoodCorrectnessBTree.toml index c39098e4cc3..84378ab0336 100644 --- a/tests/rare/RedwoodCorrectnessBTree.toml +++ b/tests/rare/RedwoodCorrectnessBTree.toml @@ -5,5 +5,5 @@ startDelay = 0 [[test.workload]] testName = 'UnitTests' - maxTestCases = 0 - testsMatching = '/redwood/correctness/btree' + maxTestCases = 1 + testsMatching = 'Lredwood/correctness/btree' diff --git a/tests/rare/RedwoodDeltaTree.toml b/tests/rare/RedwoodDeltaTree.toml new file mode 100644 index 00000000000..0d78b01bf17 --- /dev/null +++ b/tests/rare/RedwoodDeltaTree.toml @@ -0,0 +1,16 @@ +[[test]] +testTitle = 'RedwoodDeltaTree' +useDB = false +startDelay = 0 + + [[test.workload]] + testName = 'UnitTests' + maxTestCases = 1 + testsMatching = 'Lredwood/correctness/unit/deltaTree/IntIntPair' + +[[test]] +testTitle = "RedwoodRecordRef" + + [[test.workload]] + testName = 'Lredwood/correctness/unit/deltaTree/RedwoodRecordRef' + maxTestCases = 2 # there are two of those \ No newline at end of file diff --git a/tests/rare/StatusBuilderPerf.toml b/tests/rare/StatusBuilderPerf.toml new file mode 100644 index 00000000000..2a9bbdda5d2 --- /dev/null +++ b/tests/rare/StatusBuilderPerf.toml @@ -0,0 +1,9 @@ +[[test]] +testTitle = 'StatusBuilderPerf' +useDB = false +startDelay = 0 + + [[test.workload]] + testName = 'UnitTests' + maxTestCases = 1 + testsMatching = 'Lstatus/json/builderPerf' \ No newline at end of file diff --git a/tests/rare/TLogVersionMessagesOverheadFactor.toml b/tests/rare/TLogVersionMessagesOverheadFactor.toml new file mode 100644 index 00000000000..231d3fbbf06 --- /dev/null +++ b/tests/rare/TLogVersionMessagesOverheadFactor.toml @@ -0,0 +1,9 @@ +[[test]] +testTitle = 'TLogVersionMessagesOverheadFactor' +useDB = false +startDelay = 0 + + [[test.workload]] + testName = 'UnitTests' + maxTestCases = 1 + testsMatching = 'Lfdbserver/tlogserver/VersionMessagesOverheadFactor' \ No newline at end of file From 648b9c97ab9488acfd27feb267b2c2423c3d4b7a Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 8 Nov 2021 15:47:32 -0700 Subject: [PATCH 61/69] fixed stupid mistake --- tests/rare/RedwoodDeltaTree.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/rare/RedwoodDeltaTree.toml b/tests/rare/RedwoodDeltaTree.toml index 0d78b01bf17..ce79fb22891 100644 --- a/tests/rare/RedwoodDeltaTree.toml +++ b/tests/rare/RedwoodDeltaTree.toml @@ -12,5 +12,6 @@ startDelay = 0 testTitle = "RedwoodRecordRef" [[test.workload]] - testName = 'Lredwood/correctness/unit/deltaTree/RedwoodRecordRef' - maxTestCases = 2 # there are two of those \ No newline at end of file + testName = 'UnitTests' + maxTestCases = 2 # there are two of those + testsMatching = 'Lredwood/correctness/unit/deltaTree/RedwoodRecordRef' \ No newline at end of file From d6fad2e489907a1dd06b272020a41d9497152780 Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 8 Nov 2021 15:52:08 -0700 Subject: [PATCH 62/69] readded old tlog tests --- fdbserver/OldTLogServer_6_0.actor.cpp | 36 +++++++++++++++++++ fdbserver/OldTLogServer_6_2.actor.cpp | 36 +++++++++++++++++++ .../TLogVersionMessagesOverheadFactor.toml | 1 - 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/fdbserver/OldTLogServer_6_0.actor.cpp b/fdbserver/OldTLogServer_6_0.actor.cpp index d0ef9acbc60..2a566de76b6 100644 --- a/fdbserver/OldTLogServer_6_0.actor.cpp +++ b/fdbserver/OldTLogServer_6_0.actor.cpp @@ -2886,4 +2886,40 @@ struct DequeAllocator : std::allocator { } }; +TEST_CASE("Lfdbserver/tlogserver/VersionMessagesOverheadFactor") { + + typedef std::pair TestType; // type used by versionMessages + + for (int i = 1; i < 9; ++i) { + for (int j = 0; j < 20; ++j) { + DequeAllocatorStats::allocatedBytes = 0; + DequeAllocator allocator; + std::deque> d(allocator); + + int numElements = deterministicRandom()->randomInt(pow(10, i - 1), pow(10, i)); + for (int k = 0; k < numElements; ++k) { + d.push_back(TestType()); + } + + int removedElements = 0; // deterministicRandom()->randomInt(0, numElements); // FIXME: the overhead factor + // does not accurately account for removal! + for (int k = 0; k < removedElements; ++k) { + d.pop_front(); + } + + int64_t dequeBytes = DequeAllocatorStats::allocatedBytes + sizeof(std::deque); + int64_t insertedBytes = (numElements - removedElements) * sizeof(TestType); + double overheadFactor = + std::max(insertedBytes, dequeBytes - 10000) / + insertedBytes; // We subtract 10K here as an estimated upper bound for the fixed cost of an std::deque + // fprintf(stderr, "%d elements (%d inserted, %d removed):\n", numElements-removedElements, numElements, + // removedElements); fprintf(stderr, "Allocated %lld bytes to store %lld bytes (%lf overhead factor)\n", + // dequeBytes, insertedBytes, overheadFactor); + ASSERT(overheadFactor * 1024 <= SERVER_KNOBS->VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS); + } + } + + return Void(); +} + } // namespace oldTLog_6_0 diff --git a/fdbserver/OldTLogServer_6_2.actor.cpp b/fdbserver/OldTLogServer_6_2.actor.cpp index 3cae30cf089..ffee3c91bb5 100644 --- a/fdbserver/OldTLogServer_6_2.actor.cpp +++ b/fdbserver/OldTLogServer_6_2.actor.cpp @@ -3374,4 +3374,40 @@ struct DequeAllocator : std::allocator { } }; +TEST_CASE("Lfdbserver/tlogserver/VersionMessagesOverheadFactor") { + + typedef std::pair TestType; // type used by versionMessages + + for (int i = 1; i < 9; ++i) { + for (int j = 0; j < 20; ++j) { + DequeAllocatorStats::allocatedBytes = 0; + DequeAllocator allocator; + std::deque> d(allocator); + + int numElements = deterministicRandom()->randomInt(pow(10, i - 1), pow(10, i)); + for (int k = 0; k < numElements; ++k) { + d.push_back(TestType()); + } + + int removedElements = 0; // deterministicRandom()->randomInt(0, numElements); // FIXME: the overhead factor + // does not accurately account for removal! + for (int k = 0; k < removedElements; ++k) { + d.pop_front(); + } + + int64_t dequeBytes = DequeAllocatorStats::allocatedBytes + sizeof(std::deque); + int64_t insertedBytes = (numElements - removedElements) * sizeof(TestType); + double overheadFactor = + std::max(insertedBytes, dequeBytes - 10000) / + insertedBytes; // We subtract 10K here as an estimated upper bound for the fixed cost of an std::deque + // fprintf(stderr, "%d elements (%d inserted, %d removed):\n", numElements-removedElements, numElements, + // removedElements); fprintf(stderr, "Allocated %lld bytes to store %lld bytes (%lf overhead factor)\n", + // dequeBytes, insertedBytes, overheadFactor); + ASSERT(overheadFactor * 1024 <= SERVER_KNOBS->VERSION_MESSAGES_OVERHEAD_FACTOR_1024THS); + } + } + + return Void(); +} + } // namespace oldTLog_6_2 diff --git a/tests/rare/TLogVersionMessagesOverheadFactor.toml b/tests/rare/TLogVersionMessagesOverheadFactor.toml index 231d3fbbf06..c87e0721a58 100644 --- a/tests/rare/TLogVersionMessagesOverheadFactor.toml +++ b/tests/rare/TLogVersionMessagesOverheadFactor.toml @@ -5,5 +5,4 @@ startDelay = 0 [[test.workload]] testName = 'UnitTests' - maxTestCases = 1 testsMatching = 'Lfdbserver/tlogserver/VersionMessagesOverheadFactor' \ No newline at end of file From 0779512c3bb38aeaf022f345fd164b3fce81bc9b Mon Sep 17 00:00:00 2001 From: Markus Pilman Date: Mon, 8 Nov 2021 15:54:18 -0700 Subject: [PATCH 63/69] added file to run perf unit tests --- tests/PerfUnitTests.toml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/PerfUnitTests.toml diff --git a/tests/PerfUnitTests.toml b/tests/PerfUnitTests.toml new file mode 100644 index 00000000000..50648eb3dc5 --- /dev/null +++ b/tests/PerfUnitTests.toml @@ -0,0 +1,9 @@ +[[test]] +testTitle = 'PerfUnitTests' +useDB = false +startDelay = 0 + + [[test.workload]] + testName = 'UnitTests' + #maxTestCases = 1 + testsMatching = '#' \ No newline at end of file From ca6cce1988d5b4fceec4c373065f15dcf1bcb685 Mon Sep 17 00:00:00 2001 From: Pierre Zemb Date: Tue, 9 Nov 2021 22:05:12 +0100 Subject: [PATCH 64/69] Fix typo in fdb.options (#5938) --- fdbclient/vexillographer/fdb.options | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fdbclient/vexillographer/fdb.options b/fdbclient/vexillographer/fdb.options index 996fae6dc79..887268b40f1 100644 --- a/fdbclient/vexillographer/fdb.options +++ b/fdbclient/vexillographer/fdb.options @@ -58,7 +58,7 @@ description is not currently required but encouraged. paramType="String" paramDescription="The identifier that will be part of all trace file names" description="Once provided, this string will be used to replace the port/PID in the log file names." />