From 0de1b501561f62366ac4057dde96c90845e97c8d Mon Sep 17 00:00:00 2001 From: naglera Date: Wed, 14 Aug 2024 10:32:20 +0000 Subject: [PATCH 1/6] Make `dual-channel-replication` feature flag mutable Currently, the `dual-channel-replication` feature flag is immutable if `enable-protected-configs` is enabled, which is the default behavior. This PR proposes to make the `dual-channel-replication` flag mutable, allowing it to be changed dynamically without restarting the cluster. Motivation: The ability to change the `dual-channel-replication` flag dynamically is essential for testing and validating the feature on real clusters running in production environments. By making the flag mutable, we can enable or disable the feature without disrupting the cluster's operations, facilitating easier testing and experimentation. Additionally, this change would provide more flexibility for users to enable or disable the feature based on their specific requirements or operational needs without requiring a cluster restart. Signed-off-by: naglera --- src/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index afac82be6d..1d7781a963 100644 --- a/src/config.c +++ b/src/config.c @@ -3074,7 +3074,7 @@ standardConfig static_configs[] = { createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush, 0, NULL, NULL), createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL), createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL), - createBoolConfig("dual-channel-replication-enabled", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG | PROTECTED_CONFIG, server.dual_channel_replication, 0, NULL, NULL), + createBoolConfig("dual-channel-replication-enabled", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.dual_channel_replication, 0, NULL, NULL), createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL), createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL), createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL), From 64e0bbd753a1b9832342166c87fb988ece470b70 Mon Sep 17 00:00:00 2001 From: naglera Date: Mon, 19 Aug 2024 08:53:37 +0000 Subject: [PATCH 2/6] Enhance dual-channel feature flag toggle tests turn on/off during the sync Signed-off-by: naglera --- .../integration/dual-channel-replication.tcl | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 426d64442c..a68a5d501c 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -199,6 +199,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { test "Toggle dual-channel-replication-enabled: $enable start" { populate 1000 primary 10000 + $primary config set rdb-key-save-delay 0 set prev_sync_full [s 0 sync_full] set prev_sync_partial [s 0 sync_partial_ok] @@ -251,6 +252,63 @@ start_server {tags {"dual-channel-replication external:skip"}} { } $replica replicaof no one } + + foreach test_instance {primary replica} { + $primary config set dual-channel-replication-enabled $enable + $replica config set dual-channel-replication-enabled $enable + test "Online toggle dual-channel-replication-enabled: $enable start, toggle on $test_instance" { + populate 1000 primary 10000 + $primary config set rdb-key-save-delay 100000 + + $replica replicaof $primary_host $primary_port + # wait for sync to start + if {$enable == "yes"} { + wait_for_condition 500 1000 { + [string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] && + [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] && + [s 0 rdb_bgsave_in_progress] eq 1 + } else { + fail "replica didn't start sync session in time" + } + } else { + wait_for_condition 500 1000 { + [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] && + [s 0 rdb_bgsave_in_progress] eq 1 + } else { + fail "replica didn't start sync session in time" + } + } + # Toggle config + set new_value "yes" + if {$enable == "yes"} { + set new_value "no" + } + set instance $primary + if {$test_instance == "replica"} { + set instnace $replica + } + $instance config set dual-channel-replication-enabled $new_value + # Wait for at least one server cron + after 1 + + if {$enable == "yes"} { + # Verify that dual channel replication sync is still in progress + assert [string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] + assert [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] + assert {[s 0 rdb_bgsave_in_progress] eq 1} + } else { + # Verify that normal sync is still in progress + assert [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] + assert {[s 0 rdb_bgsave_in_progress] eq 1} + } + $replica replicaof no one + wait_for_condition 500 1000 { + [s -1 rdb_bgsave_in_progress] eq 0 + } else { + fail "Primary should abort sync" + } + } + } } } } From 8a7d37de64cb59f15942c0c96eb43d5526115280 Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:01:32 +0300 Subject: [PATCH 3/6] Update tests/integration/dual-channel-replication.tcl Signed-off-by: Amit Nagler <58042354+naglera@users.noreply.github.com> --- tests/integration/dual-channel-replication.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index a68a5d501c..7b7a725663 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -285,7 +285,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { } set instance $primary if {$test_instance == "replica"} { - set instnace $replica + set instance $replica } $instance config set dual-channel-replication-enabled $new_value # Wait for at least one server cron From 93b521ceb718b1ac282eec8aad863ab932e8f9b3 Mon Sep 17 00:00:00 2001 From: naglera Date: Mon, 19 Aug 2024 15:03:09 +0000 Subject: [PATCH 4/6] update configuration docs Signed-off-by: naglera --- valkey.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/valkey.conf b/valkey.conf index 68f4ad1f72..2cc1827de1 100644 --- a/valkey.conf +++ b/valkey.conf @@ -687,6 +687,12 @@ repl-diskless-load disabled # memory to accommodate the buffer during synchronization. However, this tradeoff is # generally beneficial as it prevents potential performance degradation on the primary # server, which is typically handling more critical operations. +# +# Additionally, it's important to note that when toggling this configuration on or off +# during an ongoing synchronization process, it does not change the already running +# sync method. The new configuration will take effect only for subsequent +# synchronization processes. + dual-channel-replication-enabled no # Master send PINGs to its replicas in a predefined interval. It's possible to From 7e3edcc8e6afa0baaa0e5267bc5e72cf91b05d23 Mon Sep 17 00:00:00 2001 From: naglera Date: Sun, 25 Aug 2024 06:11:32 +0000 Subject: [PATCH 5/6] Fix comments Signed-off-by: naglera --- valkey.conf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/valkey.conf b/valkey.conf index 2cc1827de1..af67479ff4 100644 --- a/valkey.conf +++ b/valkey.conf @@ -688,10 +688,9 @@ repl-diskless-load disabled # generally beneficial as it prevents potential performance degradation on the primary # server, which is typically handling more critical operations. # -# Additionally, it's important to note that when toggling this configuration on or off -# during an ongoing synchronization process, it does not change the already running -# sync method. The new configuration will take effect only for subsequent -# synchronization processes. +# When toggling this configuration on or off during an ongoing synchronization process, +# it does not change the already running sync method. The new configuration will take +# effect only for subsequent synchronization processes. dual-channel-replication-enabled no From cdebc55ebfe5aaad16ddb9cc334415493a92bae8 Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:26:51 +0300 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: Ping Xie Signed-off-by: Amit Nagler <58042354+naglera@users.noreply.github.com> --- tests/integration/dual-channel-replication.tcl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 7b7a725663..f324e58034 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -256,7 +256,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { foreach test_instance {primary replica} { $primary config set dual-channel-replication-enabled $enable $replica config set dual-channel-replication-enabled $enable - test "Online toggle dual-channel-replication-enabled: $enable start, toggle on $test_instance" { + test "Online toggle dual-channel-replication-enabled on $test_instance, starting with '$enable'" { populate 1000 primary 10000 $primary config set rdb-key-save-delay 100000 @@ -268,14 +268,14 @@ start_server {tags {"dual-channel-replication external:skip"}} { [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] && [s 0 rdb_bgsave_in_progress] eq 1 } else { - fail "replica didn't start sync session in time" + fail "replica didn't start a dual-channel sync session in time" } } else { wait_for_condition 500 1000 { [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] && [s 0 rdb_bgsave_in_progress] eq 1 } else { - fail "replica didn't start sync session in time" + fail "replica didn't start a normal full sync session in time" } } # Toggle config @@ -289,7 +289,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { } $instance config set dual-channel-replication-enabled $new_value # Wait for at least one server cron - after 1 + after 1000 if {$enable == "yes"} { # Verify that dual channel replication sync is still in progress