Skip to content

Commit

Permalink
Restart connection recovery if no running stream member for consumer
Browse files Browse the repository at this point in the history
  • Loading branch information
acogoluegnes committed Dec 19, 2024
1 parent 771aa64 commit 5680cde
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 16 deletions.
18 changes: 18 additions & 0 deletions src/main/java/com/rabbitmq/client/amqp/impl/AmqpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,24 @@ private void recoverConsumers() throws InterruptedException {
consumer.queue(),
ex);
throw ex;
} catch (AmqpException.AmqpResourceClosedException ex) {
if (ExceptionUtils.noRunningStreamMemberOnNode(ex)) {
LOGGER.warn(
"Could not recover consumer {} (queue '{}') because there is "
+ "running stream member on the node, restarting recovery",
consumer.id(),
consumer.queue(),
ex);
throw new AmqpException.AmqpConnectionException(
"No running stream member on the node", ex);
} else {
LOGGER.warn(
"Error while trying to recover consumer {} (queue '{}')",
consumer.id(),
consumer.queue(),
ex);
failedConsumers.add(consumer);
}
} catch (Exception ex) {
LOGGER.warn(
"Error while trying to recover consumer {} (queue '{}')",
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/rabbitmq/client/amqp/impl/ExceptionUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,17 @@ static boolean unauthorizedAccess(ClientResourceRemotelyClosedException e) {
return isUnauthorizedAccess(e.getErrorCondition());
}

static boolean noRunningStreamMemberOnNode(AmqpException e) {
if (e instanceof AmqpException.AmqpResourceClosedException) {
String message = e.getMessage();
return message != null
&& message.contains("stream queue")
&& message.contains("does not have a running replica on the local node");
} else {
return false;
}
}

private static boolean isUnauthorizedAccess(ErrorCondition errorCondition) {
return errorConditionEquals(errorCondition, ERROR_UNAUTHORIZED_ACCESS);
}
Expand Down
4 changes: 0 additions & 4 deletions src/test/java/com/rabbitmq/client/amqp/impl/Cli.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,6 @@ static ProcessState rabbitmqStreams(String command) {
return executeCommand(rabbitmqStreamsCommand() + " " + command);
}

static ProcessState rabbitmqUpgrade(String command) {
return executeCommand(rabbitmqUpgradeCommand() + " " + command);
}

static ProcessState rabbitmqctlIgnoreError(String command) {
return executeCommand(rabbitmqctlCommand() + " " + command, true);
}
Expand Down
63 changes: 57 additions & 6 deletions src/test/java/com/rabbitmq/client/amqp/impl/ClusterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,26 @@
import static com.rabbitmq.client.amqp.ConnectionSettings.Affinity.Operation.CONSUME;
import static com.rabbitmq.client.amqp.ConnectionSettings.Affinity.Operation.PUBLISH;
import static com.rabbitmq.client.amqp.impl.Assertions.assertThat;
import static com.rabbitmq.client.amqp.impl.ExceptionUtils.noRunningStreamMemberOnNode;
import static com.rabbitmq.client.amqp.impl.TestUtils.*;
import static java.time.Duration.ofMillis;
import static java.time.Duration.ofSeconds;
import static java.util.Arrays.stream;
import static java.util.stream.Collectors.toList;
import static java.util.stream.IntStream.range;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.rabbitmq.client.amqp.*;
import com.rabbitmq.client.amqp.AmqpException.AmqpResourceClosedException;
import com.rabbitmq.client.amqp.impl.TestUtils.Sync;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import org.assertj.core.api.Condition;
import org.junit.jupiter.api.*;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
Expand Down Expand Up @@ -440,7 +444,7 @@ void connectionShouldBeOnOwningNodeWhenAffinityIsActivatedForClassicQueues(TestI
List<String> names = range(0, URIS.length).mapToObj(ignored -> name(info)).collect(toList());
try {
List<Connection> connections =
Arrays.stream(URIS)
stream(URIS)
.map(
uri ->
connection(
Expand Down Expand Up @@ -479,6 +483,57 @@ void connectionShouldBeOnOwningNodeWhenAffinityIsActivatedForClassicQueues(TestI
}
}

@Test
void consumerOnNodeWithoutStreamMemberShouldThrow() {
List<AmqpConnection> connections = List.of();
try {
int memberCount = URIS.length - 1;
this.management.queue(this.name).stream().initialMemberCount(memberCount).queue().declare();
waitAtMost(() -> this.management.queueInfo(this.name).members().size() == memberCount);

List<String> members = this.management.queueInfo(this.name).members();

connections =
stream(URIS)
.map(uri -> (AmqpConnection) this.environment.connectionBuilder().uri(uri).build())
.collect(toList());

Connection cWithMember =
connections.stream()
.filter(c -> members.contains(c.connectionNodename()))
.findAny()
.get();
cWithMember
.consumerBuilder()
.queue(this.name)
.messageHandler((ctx, msg) -> ctx.accept())
.build();

Connection cWithNoMember =
connections.stream()
.filter(c -> !members.contains(c.connectionNodename()))
.findAny()
.get();

assertThatThrownBy(
() ->
cWithNoMember
.consumerBuilder()
.queue(this.name)
.messageHandler((ctx, msg) -> ctx.accept())
.build())
.isInstanceOf(AmqpResourceClosedException.class)
.is(
new Condition<>(
e -> noRunningStreamMemberOnNode((AmqpException) e),
"detected as a no-running-stream-member-on-connection-node exception"));

} finally {
this.connection.management().queueDeletion().delete(this.name);
connections.forEach(Connection::close);
}
}

String moveQqLeader() {
String initialLeader = deleteQqLeader();
addQqMember(initialLeader);
Expand All @@ -491,10 +546,6 @@ String deleteQqLeader() {
return deleteLeader(this::deleteQqMember);
}

String deleteStreamLeader() {
return deleteLeader(leader -> Cli.deleteStreamMember(q, leader));
}

String deleteLeader(Consumer<String> deleteMemberOperation) {
Management.QueueInfo info = queueInfo();
String initialLeader = info.leader();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import static org.assertj.core.api.Assertions.assertThat;

import com.rabbitmq.client.amqp.AmqpException;
import com.rabbitmq.client.amqp.AmqpException.AmqpConnectionException;
import com.rabbitmq.client.amqp.AmqpException.AmqpResourceClosedException;
import javax.net.ssl.SSLException;
import org.apache.qpid.protonj2.client.ErrorCondition;
import org.apache.qpid.protonj2.client.exceptions.*;
Expand All @@ -39,23 +41,23 @@ void convertTest() {
convert(new ClientSessionRemotelyClosedException("", errorCondition(ERROR_NOT_FOUND))))
.isInstanceOf(AmqpException.AmqpEntityDoesNotExistException.class);
assertThat(convert(new ClientSessionRemotelyClosedException("")))
.isInstanceOf(AmqpException.AmqpResourceClosedException.class);
.isInstanceOf(AmqpResourceClosedException.class);
assertThat(convert(new ClientLinkRemotelyClosedException("", errorCondition(ERROR_NOT_FOUND))))
.isInstanceOf(AmqpException.AmqpEntityDoesNotExistException.class);
assertThat(
convert(
new ClientLinkRemotelyClosedException("", errorCondition(ERROR_RESOURCE_DELETED))))
.isInstanceOf(AmqpException.AmqpEntityDoesNotExistException.class);
assertThat(convert(new ClientLinkRemotelyClosedException("")))
.isInstanceOf(AmqpException.AmqpResourceClosedException.class);
.isInstanceOf(AmqpResourceClosedException.class);
assertThat(convert(new ClientConnectionRemotelyClosedException("connection reset")))
.isInstanceOf(AmqpException.AmqpConnectionException.class);
.isInstanceOf(AmqpConnectionException.class);
assertThat(convert(new ClientConnectionRemotelyClosedException("connection refused")))
.isInstanceOf(AmqpException.AmqpConnectionException.class);
.isInstanceOf(AmqpConnectionException.class);
assertThat(convert(new ClientConnectionRemotelyClosedException("connection forced")))
.isInstanceOf(AmqpException.AmqpConnectionException.class);
.isInstanceOf(AmqpConnectionException.class);
assertThat(convert(new ClientConnectionRemotelyClosedException("", new RuntimeException())))
.isInstanceOf(AmqpException.AmqpConnectionException.class)
.isInstanceOf(AmqpConnectionException.class)
.hasCauseInstanceOf(ClientConnectionRemotelyClosedException.class);
assertThat(convert(new ClientConnectionRemotelyClosedException("", new SSLException(""))))
.isInstanceOf(AmqpException.AmqpSecurityException.class)
Expand All @@ -74,6 +76,17 @@ void convertTest() {
.hasCauseInstanceOf(ClientLinkRemotelyClosedException.class);
}

@Test
void testNoRunningStreamMemberOnNode() {
assertThat(
noRunningStreamMemberOnNode(
new AmqpResourceClosedException(
"stream queue 'stream-RecoveryClusterTest_clusterRestart-a69d-db752afee52a' in vhost '/' does not have a running replica on the local node [condition = amqp:internal-error]")))
.isTrue();
assertThat(noRunningStreamMemberOnNode(new AmqpResourceClosedException("foo"))).isFalse();
assertThat(noRunningStreamMemberOnNode(new AmqpConnectionException("foo", null))).isFalse();
}

ErrorCondition errorCondition(String condition) {
return ErrorCondition.create(condition, null);
}
Expand Down

0 comments on commit 5680cde

Please sign in to comment.