Skip to content

Commit

Permalink
Merge pull request #261 from EventStore/yoeight/db-622-fix-cluster-di…
Browse files Browse the repository at this point in the history
…scovery-process-in-the-java-client

Fix cluster discovery process.
  • Loading branch information
YoEight authored Jan 30, 2024
2 parents 4cd724d + cdb7ac9 commit daafa3b
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ public void createChannel(UUID previousId, InetSocketAddress candidate) {

if (this.loadServerFeatures()) {
this.channelId = UUID.randomUUID();
this.connection.confirmChannel();
logger.info("Connection to endpoint [{}] created successfully", this.connection.getLastConnectedEndpoint());
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ class ConnectionState {
private InetSocketAddress previous;
private ManagedChannel currentChannel;

// Indicates if the current channel passed all the connection pre-requisites to be used by the user
// Not exhaustive list includes:
// * If we managed to get a gossip seed from the channel
// * If we managed to read the server features (if not, it was a not found error then it's not fatal, just old node version)
private boolean confirmedChannel;

ConnectionState(EventStoreDBClientSettings settings) {
this.settings = settings;

Expand All @@ -46,7 +52,11 @@ class ConnectionState {
}

InetSocketAddress getLastConnectedEndpoint() {
return this.previous;
return this.confirmedChannel ? this.previous : null;
}

void confirmChannel() {
this.confirmedChannel = true;
}

ManagedChannel getCurrentChannel() {
Expand Down Expand Up @@ -82,6 +92,7 @@ void connect(InetSocketAddress addr) {
builder.keepAliveTime(settings.getKeepAliveInterval(), TimeUnit.MILLISECONDS);

this.currentChannel = builder.build();
this.confirmedChannel = false;
this.previous = addr;
}

Expand All @@ -108,5 +119,6 @@ public void shutdown() {

public void clear() {
this.previous = null;
this.confirmedChannel = false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class EventStoreDBClientBase {
EventStoreDBClientBase(EventStoreDBClientSettings settings) {
Discovery discovery;

if (settings.getHosts().length == 1) {
if (settings.getHosts().length == 1 && !settings.isDnsDiscover()) {
discovery = new SingleNodeDiscovery(settings.getHosts()[0]);
} else {
discovery = new ClusterDiscovery(settings);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ public synchronized EventStoreDBClient getDefaultClient(Database database) {
continue;
}

// In some rare occasions, it's possible for GHA to take much more time setting up a cluster
// through docker compose. In this case, we recreate a fresh client in the case we exhausted
// all discovery attempts and the connection got closed.
if (e.getCause() instanceof ConnectionShutdownException && (settings.isDnsDiscover() || settings.getHosts().length > 1)) {
logger.debug("Seems we exhausted all discovery attempts. Unusual but maybe docker is slow");
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
throw new RuntimeException(ex);
}
defaultClient = EventStoreDBClient.create(settings);
continue;
}

throw new RuntimeException(e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ public ExternallyCreatedCluster(boolean secure) {
public ConnectionSettingsBuilder defaultSettingsBuilder() {
return EventStoreDBClientSettings
.builder()
.dnsDiscover(true)
.defaultCredentials("admin", "changeit")
.addHost("localhost", 2_111)
.addHost("localhost", 2_112)
.addHost("localhost", 2_113)
.tls(secure)
.tlsVerifyCert(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import java.util.concurrent.ExecutionException;

public interface DeadlineTests extends ConnectionAware {
@Test
@RetryingTest(10)
default void testDefaultDeadline() throws Throwable {
EventStoreDBClient client = getDatabase().connectWith(opts ->
opts.defaultDeadline(1)
Expand Down
128 changes: 60 additions & 68 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,106 +1,98 @@
version: '3.5'
version: "3.5"

services:
volumes-provisioner:
image: "hasnat/volumes-provisioner"
image: hasnat/volumes-provisioner
environment:
PROVISION_DIRECTORIES: "1000:1000:0755:/tmp/certs"
volumes:
- "./certs:/tmp/certs"
network_mode: "none"
network_mode: none

setup:
cert-gen:
image: eventstore/es-gencert-cli:1.0.2
entrypoint: bash
user: "1000:1000"
command: >
-c "mkdir -p ./certs && cd /certs
&& es-gencert-cli create-ca
&& es-gencert-cli create-node -out ./node1 --dns-names node1.eventstore
&& es-gencert-cli create-node -out ./node2 --dns-names node2.eventstore
&& es-gencert-cli create-node -out ./node3 --dns-names node3.eventstore
&& es-gencert-cli create-node -out ./node1 -ip-addresses 127.0.0.1,172.30.240.11 -dns-names localhost
&& es-gencert-cli create-node -out ./node2 -ip-addresses 127.0.0.1,172.30.240.12 -dns-names localhost
&& es-gencert-cli create-node -out ./node3 -ip-addresses 127.0.0.1,172.30.240.13 -dns-names localhost
&& find . -type f -print0 | xargs -0 chmod 666"
container_name: setup
volumes:
- ./certs:/certs
- "./certs:/certs"
depends_on:
- volumes-provisioner

node1.eventstore: &template
esdb-node1:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
container_name: node1.eventstore
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node1.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node1.eventstore
- EVENTSTORE_GOSSIP_SEED=node2.eventstore:2113,node3.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node1/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node1/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.12:2113,172.30.240.13:2113
- EVENTSTORE_INT_IP=172.30.240.11
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node1/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node1/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2111
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1111
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node1.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1111:1113
- 2111:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.11
volumes:
- ./certs:/certs
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- setup
restart: always
- cert-gen

node2.eventstore:
<<: *template
container_name: node2.eventstore
esdb-node2:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node2.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node2.eventstore
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node3.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node2/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node2/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.13:2113
- EVENTSTORE_INT_IP=172.30.240.12
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node2/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node2/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2112
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1112
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node2.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1112:1113
- 2112:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.12
volumes:
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- cert-gen

node3.eventstore:
<<: *template
container_name: node3.eventstore
esdb-node3:
image: ghcr.io/eventstore/eventstore:${CONTAINER_IMAGE_VERSION:-latest}
env_file:
- vars.env
environment:
- EVENTSTORE_EXT_HOST_ADVERTISE_AS=node3.eventstore
- EVENTSTORE_INT_HOST_ADVERTISE_AS=node3.eventstore
- EVENTSTORE_GOSSIP_SEED=node1.eventstore:2113,node2.eventstore:2113
- EVENTSTORE_CERTIFICATE_FILE=/certs/node3/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/certs/node3/node.key
- EVENTSTORE_GOSSIP_SEED=172.30.240.11:2113,172.30.240.12:2113
- EVENTSTORE_INT_IP=172.30.240.13
- EVENTSTORE_CERTIFICATE_FILE=/etc/eventstore/certs/node3/node.crt
- EVENTSTORE_CERTIFICATE_PRIVATE_KEY_FILE=/etc/eventstore/certs/node3/node.key
- EVENTSTORE_ADVERTISE_HTTP_PORT_TO_CLIENT_AS=2113
- EVENTSTORE_ADVERTISE_TCP_PORT_TO_CLIENT_AS=1113
healthcheck:
test:
[
'CMD-SHELL',
'curl --fail --insecure https://node3.eventstore:2113/health/live || exit 1',
]
interval: 5s
timeout: 5s
retries: 24
ports:
- 1113:1113
- 2113:2113
networks:
clusternetwork:
ipv4_address: 172.30.240.13
volumes:
- ./certs:/etc/eventstore/certs
restart: unless-stopped
depends_on:
- cert-gen

networks:
clusternetwork:
name: eventstoredb.local
driver: bridge
ipam:
driver: default
config:
- subnet: 172.30.240.0/24
7 changes: 4 additions & 3 deletions vars.env
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
EVENTSTORE_CLUSTER_SIZE=3
EVENTSTORE_RUN_PROJECTIONS=All
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/certs/ca
EVENTSTORE_INT_TCP_PORT=1112
EVENTSTORE_HTTP_PORT=2113
EVENTSTORE_TRUSTED_ROOT_CERTIFICATES_PATH=/etc/eventstore/certs/ca
EVENTSTORE_DISCOVER_VIA_DNS=false
EVENTSTORE_ENABLE_EXTERNAL_TCP=true
EVENTSTORE_ENABLE_ATOM_PUB_OVER_HTTP=true
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost
EVENTSTORE_ADVERTISE_HOST_TO_CLIENT_AS=localhost

0 comments on commit daafa3b

Please sign in to comment.