-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[improve] [broker] Not close the socket if lookup failed caused by bundle unloading or metadata ex #21211
[improve] [broker] Not close the socket if lookup failed caused by bundle unloading or metadata ex #21211
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,7 +26,6 @@ | |
import java.net.URISyntaxException; | ||
import java.util.Optional; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.CompletionException; | ||
import javax.ws.rs.Encoded; | ||
import javax.ws.rs.WebApplicationException; | ||
import javax.ws.rs.core.Response; | ||
|
@@ -48,6 +47,7 @@ | |
import org.apache.pulsar.common.policies.data.TopicOperation; | ||
import org.apache.pulsar.common.util.Codec; | ||
import org.apache.pulsar.common.util.FutureUtil; | ||
import org.apache.pulsar.metadata.api.MetadataStoreException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
@@ -318,35 +318,37 @@ public static CompletableFuture<ByteBuf> lookupTopicAsync(PulsarService pulsarSe | |
requestId, shouldRedirectThroughServiceUrl(conf, lookupData))); | ||
} | ||
}).exceptionally(ex -> { | ||
if (ex instanceof CompletionException && ex.getCause() instanceof IllegalStateException) { | ||
log.info("Failed to lookup {} for topic {} with error {}", clientAppId, | ||
topicName.toString(), ex.getCause().getMessage()); | ||
} else { | ||
log.warn("Failed to lookup {} for topic {} with error {}", clientAppId, | ||
topicName.toString(), ex.getMessage(), ex); | ||
} | ||
lookupfuture.complete( | ||
newLookupErrorResponse(ServerError.ServiceNotReady, ex.getMessage(), requestId)); | ||
return null; | ||
}); | ||
handleLookupError(lookupfuture, topicName.toString(), clientAppId, requestId, ex); | ||
return null; | ||
}); | ||
} | ||
|
||
}).exceptionally(ex -> { | ||
if (ex instanceof CompletionException && ex.getCause() instanceof IllegalStateException) { | ||
log.info("Failed to lookup {} for topic {} with error {}", clientAppId, topicName.toString(), | ||
ex.getCause().getMessage()); | ||
} else { | ||
log.warn("Failed to lookup {} for topic {} with error {}", clientAppId, topicName.toString(), | ||
ex.getMessage(), ex); | ||
} | ||
|
||
lookupfuture.complete(newLookupErrorResponse(ServerError.ServiceNotReady, ex.getMessage(), requestId)); | ||
handleLookupError(lookupfuture, topicName.toString(), clientAppId, requestId, ex); | ||
return null; | ||
}); | ||
|
||
return lookupfuture; | ||
} | ||
|
||
private static void handleLookupError(CompletableFuture<ByteBuf> lookupFuture, String topicName, String clientAppId, | ||
long requestId, Throwable ex){ | ||
final Throwable unwrapEx = FutureUtil.unwrapCompletionException(ex); | ||
final String errorMsg = unwrapEx.getMessage(); | ||
if (unwrapEx instanceof IllegalStateException) { | ||
// Current broker still hold the bundle's lock, but the bundle is being unloading. | ||
log.info("Failed to lookup {} for topic {} with error {}", clientAppId, topicName, errorMsg); | ||
lookupFuture.complete(newLookupErrorResponse(ServerError.MetadataError, errorMsg, requestId)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There may be a side effect here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The previous producer will finally receive a
The partition's producer will try to reconnect according to I also improve the test to ensure the producer and consumer are still working. See Maybe I misunderstood what you meant, could you explain the details? |
||
} else if (unwrapEx instanceof MetadataStoreException){ | ||
// Load bundle ownership or acquire lock failed. | ||
// Differ with "IllegalStateException", print warning log. | ||
log.warn("Failed to lookup {} for topic {} with error {}", clientAppId, topicName, errorMsg); | ||
lookupFuture.complete(newLookupErrorResponse(ServerError.MetadataError, errorMsg, requestId)); | ||
} else { | ||
log.warn("Failed to lookup {} for topic {} with error {}", clientAppId, topicName, errorMsg); | ||
lookupFuture.complete(newLookupErrorResponse(ServerError.ServiceNotReady, errorMsg, requestId)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to return why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to guarantee that uncontrollable errprs continue the previous behavior |
||
} | ||
} | ||
|
||
protected TopicName getTopicName(String topicDomain, String tenant, String cluster, String namespace, | ||
@Encoded String encodedTopic) { | ||
String decodedName = Codec.decode(encodedTopic); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how do we know
IllegalStateException
is alwaysMetadataError
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the current case, the
IllegalStateException
is only throwing when the namespace bundle is unloading. See https://github.com/apache/pulsar/blob/master/pulsar-broker/src/main/java/org/apache/pulsar/broker/namespace/NamespaceService.java#L453-L455C36I agree with you, We should clearly define this exception. Since there are so many places that rely on the method
NamespaceService.findBrokerServiceUrl
, such as PulsarWebResource.validateTopicOwnershipAsync. We need a separate PR to do focus on it.