From f6905ce81ff95687d2d570b433379d45aed7c6e6 Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Tue, 27 Aug 2024 20:18:48 +0300 Subject: [PATCH] Remove `dual-channel-replication` Feature Flag's Protection (#908) Currently, the `dual-channel-replication` feature flag is immutable if `enable-protected-configs` is enabled, which is the default behavior. This PR proposes to make the `dual-channel-replication` flag mutable, allowing it to be changed dynamically without restarting the cluster. **Motivation:** The ability to change the `dual-channel-replication` flag dynamically is essential for testing and validating the feature on real clusters running in production environments. By making the flag mutable, we can enable or disable the feature without disrupting the cluster's operations, facilitating easier testing and experimentation. Additionally, this change would provide more flexibility for users to enable or disable the feature based on their specific requirements or operational needs without requiring a cluster restart. --------- Signed-off-by: naglera --- src/config.c | 2 +- .../integration/dual-channel-replication.tcl | 58 +++++++++++++++++++ valkey.conf | 5 ++ 3 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/config.c b/src/config.c index a4896d6cb6..e4122497fb 100644 --- a/src/config.c +++ b/src/config.c @@ -3074,7 +3074,7 @@ standardConfig static_configs[] = { createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush, 0, NULL, NULL), createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL), createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL), - createBoolConfig("dual-channel-replication-enabled", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG | PROTECTED_CONFIG, server.dual_channel_replication, 0, NULL, NULL), + createBoolConfig("dual-channel-replication-enabled", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.dual_channel_replication, 0, NULL, NULL), createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL), createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL), createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL), diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index da5e521dd2..f8c9eae44d 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -199,6 +199,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { test "Toggle dual-channel-replication-enabled: $enable start" { populate 1000 primary 10000 + $primary config set rdb-key-save-delay 0 set prev_sync_full [s 0 sync_full] set prev_sync_partial [s 0 sync_partial_ok] @@ -251,6 +252,63 @@ start_server {tags {"dual-channel-replication external:skip"}} { } $replica replicaof no one } + + foreach test_instance {primary replica} { + $primary config set dual-channel-replication-enabled $enable + $replica config set dual-channel-replication-enabled $enable + test "Online toggle dual-channel-replication-enabled on $test_instance, starting with '$enable'" { + populate 1000 primary 10000 + $primary config set rdb-key-save-delay 100000 + + $replica replicaof $primary_host $primary_port + # wait for sync to start + if {$enable == "yes"} { + wait_for_condition 500 1000 { + [string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] && + [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] && + [s 0 rdb_bgsave_in_progress] eq 1 + } else { + fail "replica didn't start a dual-channel sync session in time" + } + } else { + wait_for_condition 500 1000 { + [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] && + [s 0 rdb_bgsave_in_progress] eq 1 + } else { + fail "replica didn't start a normal full sync session in time" + } + } + # Toggle config + set new_value "yes" + if {$enable == "yes"} { + set new_value "no" + } + set instance $primary + if {$test_instance == "replica"} { + set instance $replica + } + $instance config set dual-channel-replication-enabled $new_value + # Wait for at least one server cron + after 1000 + + if {$enable == "yes"} { + # Verify that dual channel replication sync is still in progress + assert [string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] + assert [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] + assert {[s 0 rdb_bgsave_in_progress] eq 1} + } else { + # Verify that normal sync is still in progress + assert [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] + assert {[s 0 rdb_bgsave_in_progress] eq 1} + } + $replica replicaof no one + wait_for_condition 500 1000 { + [s -1 rdb_bgsave_in_progress] eq 0 + } else { + fail "Primary should abort sync" + } + } + } } } } diff --git a/valkey.conf b/valkey.conf index fef152a290..2f1d1315cc 100644 --- a/valkey.conf +++ b/valkey.conf @@ -688,6 +688,11 @@ repl-diskless-load disabled # memory to accommodate the buffer during synchronization. However, this tradeoff is # generally beneficial as it prevents potential performance degradation on the primary # server, which is typically handling more critical operations. +# +# When toggling this configuration on or off during an ongoing synchronization process, +# it does not change the already running sync method. The new configuration will take +# effect only for subsequent synchronization processes. + dual-channel-replication-enabled no # Master send PINGs to its replicas in a predefined interval. It's possible to