Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

failover2 on connect #1133

Merged
merged 1 commit into from
Oct 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import software.amazon.jdbc.plugin.staledns.AuroraStaleDnsHelper;
import software.amazon.jdbc.targetdriverdialect.TargetDriverDialect;
import software.amazon.jdbc.util.Messages;
import software.amazon.jdbc.util.PropertyUtils;
import software.amazon.jdbc.util.RdsUrlType;
import software.amazon.jdbc.util.RdsUtils;
import software.amazon.jdbc.util.SqlState;
Expand All @@ -65,6 +66,8 @@ public class FailoverConnectionPlugin extends AbstractConnectionPlugin {
private static final String TELEMETRY_WRITER_FAILOVER = "failover to writer node";
private static final String TELEMETRY_READER_FAILOVER = "failover to replica";

private static final String INTERNAL_CONNECT_PROPERTY_NAME = "76c06979-49c4-4c86-9600-a63605b83f50";

public static final AwsWrapperProperty FAILOVER_TIMEOUT_MS =
new AwsWrapperProperty(
"failoverTimeoutMs",
Expand All @@ -87,10 +90,18 @@ public class FailoverConnectionPlugin extends AbstractConnectionPlugin {
"random",
"The strategy that should be used to select a new reader host while opening a new connection.");

public static final AwsWrapperProperty ENABLE_CONNECT_FAILOVER =
new AwsWrapperProperty(
"enableConnectFailover", "false",
"Enable/disable cluster-aware failover if the initial connection to the database fails due to a "
+ "network exception. Note that this may result in a connection to a different instance in the cluster "
+ "than was specified by the URL.");

private static final Set<String> subscribedMethods =
Collections.unmodifiableSet(new HashSet<String>() {
{
addAll(SubscribedMethodHelper.NETWORK_BOUND_METHODS);
add("connect");
add("initHostProvider");
}
});
Expand Down Expand Up @@ -317,7 +328,6 @@ protected <E extends Exception> void dealWithIllegalStateException(
* @throws SQLException if an error occurs
*/
protected void failover(final HostSpec failedHost) throws SQLException {
this.pluginService.setAvailability(failedHost.asAliases(), HostAvailability.NOT_AVAILABLE);
aaron-congo marked this conversation as resolved.
Show resolved Hide resolved

if (this.failoverMode == FailoverMode.STRICT_WRITER) {
failoverWriter();
Expand Down Expand Up @@ -364,6 +374,9 @@ protected void failoverReader() throws SQLException {
throw new FailoverFailedSQLException(Messages.get("Failover.unableToConnectToReader"));
}

final Properties copyProp = PropertyUtils.copyProperties(this.properties);
copyProp.setProperty(INTERNAL_CONNECT_PROPERTY_NAME, "true");

final List<HostSpec> hosts = this.pluginService.getHosts();
Connection readerCandidateConn = null;
HostSpec readerCandidate = null;
Expand All @@ -389,7 +402,7 @@ protected void failoverReader() throws SQLException {
}

try {
readerCandidateConn = this.pluginService.connect(readerCandidate, this.properties);
readerCandidateConn = this.pluginService.connect(readerCandidate, copyProp);
if (this.pluginService.getHostRole(readerCandidateConn) != HostRole.READER) {
readerCandidateConn.close();
readerCandidateConn = null;
Expand All @@ -412,7 +425,7 @@ protected void failoverReader() throws SQLException {
this.failoverReaderHostSelectorStrategySetting);
if (readerCandidate != null) {
try {
readerCandidateConn = this.pluginService.connect(readerCandidate, this.properties);
readerCandidateConn = this.pluginService.connect(readerCandidate, copyProp);
} catch (SQLException ex) {
readerCandidate = null;
}
Expand Down Expand Up @@ -480,6 +493,8 @@ protected void failoverWriter() throws SQLException {
}

final List<HostSpec> updatedHosts = this.pluginService.getHosts();
final Properties copyProp = PropertyUtils.copyProperties(this.properties);
copyProp.setProperty(INTERNAL_CONNECT_PROPERTY_NAME, "true");

Connection writerCandidateConn = null;
final HostSpec writerCandidate = updatedHosts.stream()
Expand All @@ -489,7 +504,7 @@ protected void failoverWriter() throws SQLException {

if (writerCandidate != null) {
try {
writerCandidateConn = this.pluginService.connect(writerCandidate, this.properties);
writerCandidateConn = this.pluginService.connect(writerCandidate, copyProp);
} catch (SQLException ex) {
// do nothing
}
Expand Down Expand Up @@ -606,4 +621,69 @@ protected boolean canDirectExecute(final String methodName) {
|| methodName.equals(METHOD_IS_CLOSED)
|| methodName.equals(METHOD_ABORT));
}

@Override
public Connection connect(
final String driverProtocol,
final HostSpec hostSpec,
final Properties props,
final boolean isInitialConnection,
final JdbcCallable<Connection, SQLException> connectFunc)
throws SQLException {

if (!ENABLE_CONNECT_FAILOVER.getBoolean(props)) {
return connectFunc.call();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we be connecting using getVerifiedConnection in this case?

Suggested change
return connectFunc.call();
return this.staleDnsHelper.getVerifiedConnection(isInitialConnection, this.hostListProviderService,
driverProtocol, hostSpec, props, connectFunc);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getVerifiedConnection() call already been made of a caller of connect() that is the same failover2 plugin. I don't think we need to execute it again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry are you saying that getVerifiedConnection has been previously called by another plugin or previously called by failover2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

failover2 connect() is called, then getVerifiedConnection() is called.

conn = this.staleDnsHelper.getVerifiedConnection(isInitialConnection, this.hostListProviderService,

After that, if connection isn't established, a failover starts.

And eventually it calls pluginService.connect()

writerCandidateConn = this.pluginService.connect(writerCandidate, copyProp);

that will call failover2 connect() again. This call can be passed through to the next plugin since it's already been handled earlier.

Copy link
Contributor

@aaron-congo aaron-congo Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly that explains why we should use connectFunc.call instead of getVerifiedConnection when the props contain the INTERNAL_CONNECT_PROPERTY_NAME property, but this particular line is covering a different scenario: it is hit when connect failover is disabled. When connect failover is disabled we will never enter the failover logic below, so as the code currently is written, getVerifiedConnection will never be called in this scenario

}

// This call was initiated by this failover2 plugin and doesn't require any additional processing.
if (props.containsKey(INTERNAL_CONNECT_PROPERTY_NAME)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it work to change this condition to the following so that we don't have to add the INTERNAL_CONNECT_PROPERTY_NAME? I think calling forceConnect probably always indicates we just want to connect directly without any special additional logic. If this works, we can probably just get rid of connectInternal and have different implementations for connect vs forceConnect.

Suggested change
if (props.containsKey(INTERNAL_CONNECT_PROPERTY_NAME)) {
if (isForceConnect)) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forceConnect() is for direct connection but it's not used in failover2. So forceConnect() makes not much sense to this case. Also, failover2 uses regular connect() because MonitoringHostListProvider already verified topology so no need to force direct connect. And with a regular connect(), the call comes to failover2 plugin again and our intention is to skip it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Using the INTERNAL_CONNECT_PROPERTY_NAME property as a flag seems a bit hacky/strange to me but I can't think of an alternative at the moment.

return connectFunc.call();
}

Connection conn = null;

final HostSpec hostSpecWithAvailability = this.pluginService.getHosts().stream()
.filter(x -> x.getHostAndPort().equals(hostSpec.getHostAndPort()))
.findFirst()
.orElse(null);

if (hostSpecWithAvailability == null
|| hostSpecWithAvailability.getAvailability() != HostAvailability.NOT_AVAILABLE) {

try {
conn = this.staleDnsHelper.getVerifiedConnection(isInitialConnection, this.hostListProviderService,
driverProtocol, hostSpec, props, connectFunc);
} catch (final SQLException e) {
if (!this.shouldExceptionTriggerConnectionSwitch(e)) {
throw e;
}

this.pluginService.setAvailability(hostSpec.asAliases(), HostAvailability.NOT_AVAILABLE);

try {
this.failover(hostSpec);
} catch (FailoverSuccessSQLException failoverSuccessException) {
conn = this.pluginService.getCurrentConnection();
}
}
} else {
try {
this.pluginService.refreshHostList();
this.failover(hostSpec);
} catch (FailoverSuccessSQLException failoverSuccessException) {
conn = this.pluginService.getCurrentConnection();
}
}

if (conn == null) {
// This should be unreachable, the above logic will either get a connection successfully or throw an exception.
throw new SQLException(Messages.get("Failover.unableToConnect"));
}

if (isInitialConnection) {
this.pluginService.refreshHostList(conn);
}

return conn;
}
}
Loading