Skip to content

Commit

Permalink
Add A-A failover scenario test (#3935)
Browse files Browse the repository at this point in the history
* Add A-A failover scenario test

* Update src/main/java/redis/clients/jedis/providers/MultiClusterPooledConnectionProvider.java

Co-authored-by: M Sazzadul Hoque <7600764+sazzad16@users.noreply.github.com>

* Update src/test/java/redis/clients/jedis/scenario/ActiveActiveFailoverTest.java

Co-authored-by: M Sazzadul Hoque <7600764+sazzad16@users.noreply.github.com>

* Add missing import

---------

Co-authored-by: M Sazzadul Hoque <7600764+sazzad16@users.noreply.github.com>
  • Loading branch information
uglide and sazzad16 authored Sep 5, 2024
1 parent 4c09472 commit 708405b
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import redis.clients.jedis.*;
import redis.clients.jedis.MultiClusterClientConfig.ClusterConfig;
import redis.clients.jedis.annots.Experimental;
import redis.clients.jedis.annots.VisibleForTesting;
import redis.clients.jedis.exceptions.JedisConnectionException;
import redis.clients.jedis.exceptions.JedisValidationException;
import redis.clients.jedis.util.Pool;
Expand Down Expand Up @@ -299,6 +300,11 @@ public Cluster getCluster() {
return multiClusterMap.get(activeMultiClusterIndex);
}

@VisibleForTesting
public Cluster getCluster(int multiClusterIndex) {
return multiClusterMap.get(multiClusterIndex);
}

public CircuitBreaker getClusterCircuitBreaker() {
return multiClusterMap.get(activeMultiClusterIndex).getCircuitBreaker();
}
Expand Down
4 changes: 4 additions & 0 deletions src/test/java/redis/clients/jedis/EndpointConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public HostAndPort getHostAndPort() {
return JedisURIHelper.getHostAndPort(endpoints.get(0));
}

public HostAndPort getHostAndPort(int index) {
return JedisURIHelper.getHostAndPort(endpoints.get(index));
}

public String getPassword() {
return password;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package redis.clients.jedis.scenario;

import io.github.resilience4j.circuitbreaker.CircuitBreakerConfig;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.*;
import redis.clients.jedis.providers.MultiClusterPooledConnectionProvider;
import redis.clients.jedis.exceptions.JedisConnectionException;

import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;

import static org.junit.Assert.*;

public class ActiveActiveFailoverTest {
private static final Logger log = LoggerFactory.getLogger(ActiveActiveFailoverTest.class);

private static EndpointConfig endpoint;

private final FaultInjectionClient faultClient = new FaultInjectionClient();

@BeforeClass
public static void beforeClass() {
try {
ActiveActiveFailoverTest.endpoint = HostAndPorts.getRedisEndpoint("re-active-active");
} catch (IllegalArgumentException e) {
log.warn("Skipping test because no Redis endpoint is configured");
org.junit.Assume.assumeTrue(false);
}
}

@Test
public void testFailover() {

MultiClusterClientConfig.ClusterConfig[] clusterConfig = new MultiClusterClientConfig.ClusterConfig[2];

JedisClientConfig config = endpoint.getClientConfigBuilder()
.socketTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS)
.connectionTimeoutMillis(RecommendedSettings.DEFAULT_TIMEOUT_MS).build();

clusterConfig[0] = new MultiClusterClientConfig.ClusterConfig(endpoint.getHostAndPort(0),
config, RecommendedSettings.poolConfig);
clusterConfig[1] = new MultiClusterClientConfig.ClusterConfig(endpoint.getHostAndPort(1),
config, RecommendedSettings.poolConfig);

MultiClusterClientConfig.Builder builder = new MultiClusterClientConfig.Builder(clusterConfig);

builder.circuitBreakerSlidingWindowType(CircuitBreakerConfig.SlidingWindowType.TIME_BASED);
builder.circuitBreakerSlidingWindowSize(1); // SLIDING WINDOW SIZE IN SECONDS
builder.circuitBreakerSlidingWindowMinCalls(1);
builder.circuitBreakerFailureRateThreshold(10.0f); // percentage of failures to trigger circuit breaker

builder.retryWaitDuration(10);
builder.retryMaxAttempts(1);
builder.retryWaitDurationExponentialBackoffMultiplier(1);

class FailoverReporter implements Consumer<String> {

String currentClusterName = "not set";

boolean failoverHappened = false;

Instant failoverAt = null;

public String getCurrentClusterName() {
return currentClusterName;
}

@Override
public void accept(String clusterName) {
this.currentClusterName = clusterName;
log.info(
"\n\n====FailoverEvent=== \nJedis failover to cluster: {}\n====FailoverEvent===\n\n",
clusterName);

failoverHappened = true;
failoverAt = Instant.now();
}
}

MultiClusterPooledConnectionProvider provider = new MultiClusterPooledConnectionProvider(
builder.build());
FailoverReporter reporter = new FailoverReporter();
provider.setClusterFailoverPostProcessor(reporter);
provider.setActiveMultiClusterIndex(1);

UnifiedJedis client = new UnifiedJedis(provider);

AtomicLong retryingThreadsCounter = new AtomicLong(0);
AtomicLong failedCommandsAfterFailover = new AtomicLong(0);
AtomicReference<Instant> lastFailedCommandAt = new AtomicReference<>();

// Start thread that imitates an application that uses the client
MultiThreadedFakeApp fakeApp = new MultiThreadedFakeApp(client, (UnifiedJedis c) -> {

long threadId = Thread.currentThread().getId();

int attempt = 0;
int maxTries = 500;
int retryingDelay = 5;
while (true) {
try {
Map<String, String> executionInfo = new HashMap<String, String>() {{
put("threadId", String.valueOf(threadId));
put("cluster", reporter.getCurrentClusterName());
}};
client.xadd("execution_log", StreamEntryID.NEW_ENTRY, executionInfo);

if (attempt > 0) {
log.info("Thread {} recovered after {} ms. Threads still not recovered: {}", threadId,
attempt * retryingDelay, retryingThreadsCounter.decrementAndGet());
}

break;
} catch (JedisConnectionException e) {

if (reporter.failoverHappened) {
long failedCommands = failedCommandsAfterFailover.incrementAndGet();
lastFailedCommandAt.set(Instant.now());
log.warn(
"Thread {} failed to execute command after failover. Failed commands after failover: {}",
threadId, failedCommands);
}

if (attempt == 0) {
long failedThreads = retryingThreadsCounter.incrementAndGet();
log.warn("Thread {} failed to execute command. Failed threads: {}", threadId,
failedThreads);
}
try {
Thread.sleep(retryingDelay);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
if (++attempt == maxTries) throw e;
}
}
return true;
}, 18);
fakeApp.setKeepExecutingForSeconds(30);
Thread t = new Thread(fakeApp);
t.start();

HashMap<String, Object> params = new HashMap<>();
params.put("bdb_id", endpoint.getBdbId());
params.put("rlutil_command", "pause_bdb");

FaultInjectionClient.TriggerActionResponse actionResponse = null;

try {
log.info("Triggering bdb_pause");
actionResponse = faultClient.triggerAction("execute_rlutil_command", params);
} catch (IOException e) {
fail("Fault Injection Server error:" + e.getMessage());
}

log.info("Action id: {}", actionResponse.getActionId());
fakeApp.setAction(actionResponse);

try {
t.join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}

ConnectionPool pool = provider.getCluster(1).getConnectionPool();

log.info("First connection pool state: active: {}, idle: {}", pool.getNumActive(),
pool.getNumIdle());
log.info("Full failover time: {} s",
Duration.between(reporter.failoverAt, lastFailedCommandAt.get()).getSeconds());

assertEquals(0, pool.getNumActive());
assertTrue(fakeApp.capturedExceptions().isEmpty());

client.close();
}

}
12 changes: 6 additions & 6 deletions src/test/java/redis/clients/jedis/scenario/FakeApp.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@

public class FakeApp implements Runnable {

private static final Logger log = LoggerFactory.getLogger(FakeApp.class);
protected static final Logger log = LoggerFactory.getLogger(FakeApp.class);

public void setKeepExecutingForSeconds(int keepExecutingForSeconds) {
this.keepExecutingForSeconds = keepExecutingForSeconds;
}

private int keepExecutingForSeconds = 60;
protected int keepExecutingForSeconds = 60;

private FaultInjectionClient.TriggerActionResponse actionResponse = null;
private final UnifiedJedis client;
private final ExecutedAction action;
private List<JedisException> exceptions = new ArrayList<>();
protected FaultInjectionClient.TriggerActionResponse actionResponse = null;
protected final UnifiedJedis client;
protected final ExecutedAction action;
protected List<JedisException> exceptions = new ArrayList<>();

@FunctionalInterface
public interface ExecutedAction {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package redis.clients.jedis.scenario;

import redis.clients.jedis.UnifiedJedis;
import redis.clients.jedis.exceptions.JedisConnectionException;

import java.time.Duration;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class MultiThreadedFakeApp extends FakeApp {

private final ExecutorService executorService;

public MultiThreadedFakeApp(UnifiedJedis client, FakeApp.ExecutedAction action, int numThreads) {
super(client, action);
this.executorService = Executors.newFixedThreadPool(numThreads);
}

@Override
public void run() {
log.info("Starting FakeApp");

int checkEachSeconds = 5;
int timeoutSeconds = 120;

while (actionResponse == null || !actionResponse.isCompleted(
Duration.ofSeconds(checkEachSeconds), Duration.ofSeconds(keepExecutingForSeconds),
Duration.ofSeconds(timeoutSeconds))) {
try {
executorService.submit(() -> action.run(client));
} catch (JedisConnectionException e) {
log.error("Error executing action", e);
exceptions.add(e);
}
}

executorService.shutdown();

try {
if (!executorService.awaitTermination(keepExecutingForSeconds, TimeUnit.SECONDS)) {
executorService.shutdownNow();
}
} catch (InterruptedException e) {
log.error("Error waiting for executor service to terminate", e);
}
}
}

0 comments on commit 708405b

Please sign in to comment.