Skip to content

Commit

Permalink
Hide repl-rdb-channel config, . Clarify fallback point. Better utiliz…
Browse files Browse the repository at this point in the history
…e master_supports_rdb_channel
  • Loading branch information
naglera committed Feb 6, 2024
1 parent e44bc69 commit 5fc0be9
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 39 deletions.
21 changes: 0 additions & 21 deletions redis.conf
Original file line number Diff line number Diff line change
Expand Up @@ -660,27 +660,6 @@ repl-diskless-sync-max-replicas 0
# during replication.
repl-diskless-load disabled

# In full sync, Master can accumulate replication data or send it with the replica rdb,
# to accumulated there. By enabling the RDB channel (on the replica), the master's
# memory load is moved to the syncing replica. Additionally, by using rdb channel, the
# bgsave process streams the snapshot directly to the replica, thereby reducing the CPU
# load on the master main process.
#
# In master disk-based sync, RDB channel sync is not permitted because it may cause the
# master to start a new sync session for every replica, even if their sync requests are
# close together.
#
repl-rdb-channel no

# The loading-process-events-interval-ms configuration parameter in Redis controls the interval,
# in milliseconds, at which a replica responds to client's queries during the process of loading
# its buffer into memory.
# When a replica is loading its buffer into memory, it needs to strike a balance between
# processing client queries and completing the loading process efficiently. The
# loading-process-events-interval-ms setting allows administrators to configure the time
# interval at which the replica will respond to client's queries during the loading process.
loading-process-events-interval-ms 100

# Master send PINGs to its replicas in a predefined interval. It's possible to
# change this interval with the repl_ping_replica_period option. The default
# value is 10 seconds.
Expand Down
2 changes: 1 addition & 1 deletion src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3046,7 +3046,7 @@ standardConfig static_configs[] = {
createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL),
createBoolConfig("repl-rdb-channel", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.rdb_channel_enabled, 0, NULL, NULL),
createBoolConfig("repl-rdb-channel", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG | HIDDEN_CONFIG, server.rdb_channel_enabled, 0, NULL, NULL),
createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL),
createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL),
createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL),
Expand Down
36 changes: 23 additions & 13 deletions src/replication.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ void syncCommand(client *c) {
} else if (isReplicaMainChannel(c)) {
char buf[128];
int buflen;
serverLog(LL_NOTICE,"Replica %s is marked as no-fullsync, and psync isn't possible. Full sync will continue with dedicated RDB connection.", replicationGetSlaveName(c));
serverLog(LL_NOTICE,"Replica %s is marked as main-conn, and psync isn't possible. Full sync will continue with dedicated RDB connection.", replicationGetSlaveName(c));
buflen = snprintf(buf,sizeof(buf),"-FULLSYNCNEEDED\r\n");
if (connWrite(c->conn,buf,buflen) != buflen) {
freeClientAsync(c);
Expand Down Expand Up @@ -1321,11 +1321,12 @@ void replconfCommand(client *c) {
c->flags &= ~CLIENT_REPL_RDB_CHANNEL;
c->slave_req &= ~SLAVE_REQ_RDB_CHANNEL;
}
} else if (!strcasecmp(c->argv[j]->ptr, "no-fullsync")) {
/* REPLCONF no-fullsync is used to identify the client wants
* to partial sync if possible. If not, do not auto
* trigger full sync. We use it during rdb channel sync
* to mark the main connection. */
} else if (!strcasecmp(c->argv[j]->ptr, "main-conn") && server.rdb_channel_enabled) {
/* REPLCONF main-conn is used to identify the replica main connection durig
* rdb-connection sync. It also means that if psync is impossible, master
* should not auto trigger full sync.
* If rdb-channel is disable on this master, treat this command as unrecognized
* replconf option. */
long no_fullsync = 0;
if (getRangeLongFromObjectOrReply(c, c->argv[j +1],
0, 1, &no_fullsync, NULL) != C_OK) {
Expand Down Expand Up @@ -3092,7 +3093,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
}

if (!strncmp(reply, "-FULLSYNCNEEDED", 15)) {
/* In case the main connection with master is at no-fullsync mode, the master
/* In case the main connection with master is at main-conn mode, the master
* will respond with -FULLSYNCNEEDED to imply that psync is not possible */
serverLog(LL_NOTICE, "PSYNC is not possible, initialize RDB channel.");
sdsfree(reply);
Expand Down Expand Up @@ -3243,9 +3244,11 @@ void syncWithMaster(connection *conn) {
if (err) goto write_error;
}

/* Mark main connection for psync only, in case we use rdb-channel for sync*/
/* When using rdb-channel for sync, mark the main connection only for psync.
* The master's capabilities will also be verified here, since if the master
* does not support rdb-channel sync, it will not understand this command. */
if (server.rdb_channel_enabled) {
err = sendCommand(conn,"REPLCONF", "no-fullsync", "1", NULL);
err = sendCommand(conn,"REPLCONF", "main-conn", "1", NULL);
}

/* Inform the master of our (slave) capabilities.
Expand Down Expand Up @@ -3320,9 +3323,16 @@ void syncWithMaster(connection *conn) {
if (server.repl_state == REPL_STATE_RECEIVE_NO_FULLSYNC_REPLY) {
err = receiveSynchronousResponse(conn);
if (err == NULL) goto rdb_conn_error;
if (err[0] == '-') {
serverLog(LL_WARNING, "Master does not understand REPLCONF no-fullsync aborting rdb-channel sync %s", err);
else if (err[0] == '-') {
/* Activate rdb-channel fallback mechanism. The master did not understand
* REPLCONF main-conn. This means the master does not support RDB channel
* synchronization. The replica will continue the sync session with one
* channel (normally). */
serverLog(LL_WARNING, "Master does not understand REPLCONF main-conn aborting rdb-channel sync %s", err);
server.master_supports_rdb_channel = 0;
} else if (memcmp(err, "+OK", 3) == 0) {
/* Master support rdb-connection sync. Continue with rdb-channel approach. */
server.master_supports_rdb_channel = 1;
}
sdsfree(err);
server.repl_state = REPL_STATE_RECEIVE_CAPA_REPLY;
Expand Down Expand Up @@ -3434,7 +3444,7 @@ void syncWithMaster(connection *conn) {
server.repl_transfer_fd = dfd;
}

if (server.rdb_channel_enabled && server.master_supports_rdb_channel) {
if (server.master_supports_rdb_channel > 0) {
/* Create a full sync connection */
server.repl_rdb_transfer_s = connCreate(connTypeOfReplication());
if (connConnect(server.repl_rdb_transfer_s, server.masterhost, server.masterport,
Expand Down Expand Up @@ -3631,7 +3641,7 @@ void replicationSetMaster(char *ip, int port) {
server.repl_state = REPL_STATE_CONNECT;
/* Allow trying rdb-channel sync with the new master. If new master doesn't
* support rdb-channel sync, we will set to 0 afterwards. */
server.master_supports_rdb_channel = 1;
server.master_supports_rdb_channel = -1;
serverLog(LL_NOTICE,"Connecting to MASTER %s:%d",
server.masterhost, server.masterport);
connectWithMaster();
Expand Down
2 changes: 1 addition & 1 deletion src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -2661,7 +2661,7 @@ void initServer(void) {
server.cron_malloc_stats.allocator_active = 0;
server.cron_malloc_stats.allocator_resident = 0;
server.lastbgsave_status = C_OK;
server.master_supports_rdb_channel = 1;
server.master_supports_rdb_channel = -1;
server.aof_last_write_status = C_OK;
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
Expand Down
3 changes: 2 additions & 1 deletion src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -1820,7 +1820,8 @@ struct redisServer {
int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
int rdb_child_type; /* Type of save by active child. */
int lastbgsave_status; /* C_OK or C_ERR */
int master_supports_rdb_channel;/* Track whether the master is able to sync using rdb channel */
int master_supports_rdb_channel;/* Track whether the master is able to sync using rdb channel.
* -1 = unknown, 0 = no, 1 = yes. */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
int rdb_pipe_read; /* RDB pipe used to transfer the rdb data */
/* to the parent process in diskless repl. */
Expand Down
8 changes: 6 additions & 2 deletions tests/integration/replication.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -1474,6 +1474,7 @@ start_server {tags {"repl rdb-channel external:skip"}} {
# operation, so that the replica remains in the handshake state.
$master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 1000
$master config set repl-rdb-channel yes

# Start the replication process...
$replica config set repl-rdb-channel yes
Expand Down Expand Up @@ -1526,6 +1527,7 @@ start_server {tags {"repl rdb-channel external:skip"}} {
set master_port [srv 0 port]

$master config set rdb-key-save-delay 200
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
$replica config set repl-diskless-sync no

Expand Down Expand Up @@ -1675,6 +1677,7 @@ start_server {tags {"repl rdb-channel external:skip"}} {

$master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 5; # allow both replicas to ask for sync
$master config set repl-rdb-channel yes

$replica1 config set repl-rdb-channel yes
$replica2 config set repl-rdb-channel yes
Expand Down Expand Up @@ -1775,9 +1778,9 @@ start_server {tags {"repl rdb-channel external:skip"}} {

# Wait for sync to fail
wait_for_condition 100 50 {
[log_file_matches $replica1_log "*-ERR Rdb channel sync is not allowed when repl-diskless-sync disabled*"]
[log_file_matches $replica1_log "*Master does not understand REPLCONF main-conn*"]
} else {
fail "Sync should fail"
fail "rdb-connection sync rollback should have been triggered."
}

# Wait for mitigation and resync
Expand Down Expand Up @@ -1806,6 +1809,7 @@ start_server {tags {"repl rdb-channel external:skip"}} {

$master config set repl-diskless-sync yes
$master debug sleep-after-fork 5;# Stop master after fork
$master config set repl-rdb-channel yes

$replica config set repl-rdb-channel yes
$replica config set loglevel debug
Expand Down

0 comments on commit 5fc0be9

Please sign in to comment.