-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dns: add dns resolver implementation based on Apple APIs #13074
Changes from 8 commits
6b72142
afb5731
3f6adce
8e3e44e
204f3dd
b5ecbdb
6fb5a14
3ecd18a
1c972cc
d90ab49
09c8946
875458a
44a1ce6
25cc48f
2797de8
1001842
5c3ab10
eb85d38
5f79bd3
6ff5a29
8374eb6
3fd1cc9
a5477a9
c1facb7
04e2861
b220407
7167cb6
8062ad0
29749aa
2bada07
896d88a
2332f23
7516419
ddf8456
41ba439
72cc832
9286227
c3bbdd6
9a6120d
4491132
2d04495
b533f2c
4800d0d
fd2521f
6737b8f
6f14724
40b40e4
80fb3f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,315 @@ | ||
#include "common/network/apple_dns_impl.h" | ||
|
||
#include <dns_sd.h> | ||
|
||
#include <chrono> | ||
#include <cstdint> | ||
#include <list> | ||
#include <memory> | ||
#include <string> | ||
|
||
#include "envoy/common/platform.h" | ||
|
||
#include "common/common/assert.h" | ||
#include "common/common/fmt.h" | ||
#include "common/network/address_impl.h" | ||
#include "common/network/utility.h" | ||
|
||
#include "absl/strings/str_join.h" | ||
|
||
namespace Envoy { | ||
namespace Network { | ||
|
||
AppleDnsResolverImpl::AppleDnsResolverImpl(Event::Dispatcher& dispatcher) | ||
: dispatcher_(dispatcher) { | ||
ENVOY_LOG(warn, "Constructing DNS resolver"); | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
initializeMainSdRef(); | ||
} | ||
|
||
AppleDnsResolverImpl::~AppleDnsResolverImpl() { | ||
ENVOY_LOG(warn, "Destructing DNS resolver"); | ||
deallocateMainSdRef(); | ||
} | ||
|
||
void AppleDnsResolverImpl::deallocateMainSdRef() { | ||
ENVOY_LOG(warn, "DNSServiceRefDeallocate main sd ref"); | ||
// dns_sd.h says: | ||
// If the reference's underlying socket is used in a run loop or select() call, it should | ||
// be removed BEFORE DNSServiceRefDeallocate() is called, as this function closes the | ||
// reference's socket. | ||
sd_ref_event_.reset(); | ||
DNSServiceRefDeallocate(main_sd_ref_); | ||
} | ||
|
||
void AppleDnsResolverImpl::initializeMainSdRef() { | ||
// This implementation uses a shared connection for two main reasons: | ||
// 1. Efficiency of concurrent resolutions by sharing the same underlying UDS to the DNS | ||
// daemon. | ||
// 2. An error on a connection to the daemon is good indication that other connection, | ||
// even if not shared, would not succeed. So it is better to share one connection and | ||
// promptly cancel all outstanding queries, rather than individually wait for all | ||
// connections to error out. | ||
// However, using a shared connection brings some complexities detailed in the inline comments | ||
// for kDNSServiceFlagsShareConnection in dns_sd.h, and copied (and edited) in this implementation | ||
// where relevant. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't feel strongly about it, but is this a premature optimization? Should we just do a connection per resolution for now and keep it simple? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ultimately decided to leave it. I think the reasons in the inline comment are strong enough to leave it. Mostly around "failing fast" in mobile environments were instead of potentially waiting for independent timeouts that may stagger, all outstanding queries will fail. |
||
auto error = DNSServiceCreateConnection(&main_sd_ref_); | ||
RELEASE_ASSERT(!error, "error in DNSServiceCreateConnection"); | ||
|
||
auto fd = DNSServiceRefSockFD(main_sd_ref_); | ||
RELEASE_ASSERT(fd != -1, "error in DNSServiceRefSockFD"); | ||
ENVOY_LOG(warn, "DNS resolver has fd={}", fd); | ||
|
||
sd_ref_event_ = dispatcher_.createFileEvent( | ||
fd, [this](uint32_t events) { onEventCallback(events); }, Event::FileTriggerType::Level, | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Event::FileReadyType::Read); | ||
sd_ref_event_->setEnabled(Event::FileReadyType::Read); | ||
} | ||
|
||
void AppleDnsResolverImpl::onEventCallback(uint32_t events) { | ||
ENVOY_LOG(warn, "DNS resolver file event"); | ||
ASSERT(events & Event::FileReadyType::Read); | ||
DNSServiceProcessResult(main_sd_ref_); | ||
} | ||
|
||
ActiveDnsQuery* AppleDnsResolverImpl::resolve(const std::string& dns_name, | ||
DnsLookupFamily dns_lookup_family, | ||
ResolveCb callback) { | ||
ENVOY_LOG(warn, "DNS resolver resolve={}", dns_name); | ||
std::unique_ptr<PendingResolution> pending_resolution( | ||
new PendingResolution(*this, callback, dispatcher_, main_sd_ref_, dns_name)); | ||
|
||
DNSServiceErrorType error = pending_resolution->dnsServiceGetAddrInfo(dns_lookup_family); | ||
if (error != kDNSServiceErr_NoError) { | ||
ENVOY_LOG(error, "DNS resolver error in dnsServiceGetAddrInfo for {}", dns_name); | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return nullptr; | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// If the query was synchronously resolved, there is no need to return the query. | ||
if (pending_resolution->synchronously_completed_) { | ||
return nullptr; | ||
} | ||
|
||
pending_resolution->owned_ = true; | ||
return pending_resolution.release(); | ||
} | ||
|
||
void AppleDnsResolverImpl::addPendingQuery(PendingResolution* query) { | ||
ASSERT(queries_with_pending_cb_.count(query) == 0); | ||
queries_with_pending_cb_.insert(query); | ||
} | ||
|
||
void AppleDnsResolverImpl::flushPendingQueries() { | ||
ENVOY_LOG(warn, "DNS Resolver flushing {} queries", queries_with_pending_cb_.size()); | ||
for (std::set<PendingResolution*>::iterator it = queries_with_pending_cb_.begin(); | ||
it != queries_with_pending_cb_.end(); ++it) { | ||
auto query = *it; | ||
try { | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ASSERT(query->pending_cb_); | ||
query->callback_(query->pending_cb_->status_, std::move(query->pending_cb_->responses_)); | ||
} catch (const EnvoyException& e) { | ||
ENVOY_LOG(critical, "EnvoyException in DNSService callback: {}", e.what()); | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
query->dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} catch (const std::exception& e) { | ||
ENVOY_LOG(critical, "std::exception in DNSService callback: {}", e.what()); | ||
query->dispatcher_.post([s = std::string(e.what())] { throw EnvoyException(s); }); | ||
} catch (...) { | ||
ENVOY_LOG(critical, "Unknown exception in DNSService callback"); | ||
query->dispatcher_.post([] { throw EnvoyException("unknown"); }); | ||
} | ||
|
||
if (query->owned_) { | ||
ENVOY_LOG(warn, "Resolution for {} completed (async)", query->dns_name_); | ||
delete *it; | ||
} else { | ||
ENVOY_LOG(warn, "Resolution for {} completed (synchronously)", query->dns_name_); | ||
query->synchronously_completed_ = true; | ||
} | ||
} | ||
|
||
// Purge the contents so no one tries to delete them again. | ||
queries_with_pending_cb_.clear(); | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
AppleDnsResolverImpl::PendingResolution::~PendingResolution() { | ||
ENVOY_LOG(warn, "Destroying PendingResolution for {}", dns_name_); | ||
DNSServiceRefDeallocate(individual_sd_ref_); | ||
} | ||
|
||
void AppleDnsResolverImpl::PendingResolution::cancel() { | ||
ENVOY_LOG(warn, "Cancelling PendingResolution for {}", dns_name_); | ||
ASSERT(owned_); | ||
if (pending_cb_) { | ||
/* (taken and edited from dns_sd.h) | ||
* Canceling operations and kDNSServiceFlagsMoreComing | ||
* Whenever you cancel any operation for which you had deferred [resolution] | ||
* because of a kDNSServiceFlagsMoreComing flag, you should [flush]. This is because, after | ||
* cancelling the operation, you can no longer wait for a callback *without* MoreComing set, to | ||
* tell you [to flush] (the operation has been canceled, so there will be no more callbacks). | ||
* | ||
* [FURTHER] An implication of the collective | ||
* kDNSServiceFlagsMoreComing flag for shared connections is that this | ||
* guideline applies more broadly -- any time you cancel an operation on | ||
* a shared connection, you should perform all deferred updates for all | ||
* operations sharing that connection. This is because the MoreComing flag | ||
* might have been referring to events coming for the operation you canceled, | ||
* which will now not be coming because the operation has been canceled. | ||
*/ | ||
parent_.flushPendingQueries(); | ||
} else { | ||
delete this; | ||
} | ||
} | ||
|
||
void AppleDnsResolverImpl::PendingResolution::onDNSServiceGetAddrInfoReply( | ||
DNSServiceFlags flags, uint32_t interface_index, DNSServiceErrorType error_code, | ||
const char* hostname, const struct sockaddr* address, uint32_t ttl) { | ||
ENVOY_LOG(warn, | ||
"DNS for {} resolved with: flags={}[MoreComing={}, Add={}], interface_index={}, " | ||
"error_code={}, hostname={}", | ||
dns_name_, flags, flags & kDNSServiceFlagsMoreComing ? "yes" : "no", | ||
flags & kDNSServiceFlagsAdd ? "yes" : "no", interface_index, error_code, hostname); | ||
ASSERT(interface_index == 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How certain are you of some of these ASSERTs on state that comes from the NS libs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As certain as I can be from what they write in the comments in dns_sd.h 😅 |
||
|
||
if (error_code == kDNSServiceErr_DefunctConnection || error_code == kDNSServiceErr_Timeout || | ||
error_code == kDNSServiceErr_Refused) { | ||
// Current query gets a failure status | ||
if (!pending_cb_) { | ||
ENVOY_LOG(warn, "[Error path] Adding to queries pending callback"); | ||
pending_cb_ = {ResolutionStatus::Failure, {}}; | ||
parent_.addPendingQuery(this); | ||
} else { | ||
ENVOY_LOG(warn, "[Error path] Changing status for query already pending flush"); | ||
pending_cb_->status_ = ResolutionStatus::Failure; | ||
} | ||
|
||
ENVOY_LOG(warn, "[Error path] DNS Resolver flushing queries pending callback"); | ||
parent_.flushPendingQueries(); | ||
|
||
// The main sd ref if destroyed here because a callback with an error is good indication that | ||
// the connection to the DNS deamon is faulty and needs to be torn down. | ||
// | ||
// Deallocation of the MainSdRef __has__ to happend __after__ flushing queries. Flushing queries | ||
// deallocates individual refs, so deallocating the main ref aheadd would cause deallocation of | ||
// invalid individual refs per dns_sd.h | ||
parent_.deallocateMainSdRef(); | ||
parent_.initializeMainSdRef(); | ||
|
||
return; | ||
} | ||
|
||
// At this point all known non-success cases have been dealt with. Assert that the error code is | ||
// no error. | ||
// POINT FOR DISCUSSION: Crash otherwise to alert consumers that there are unadressed edge cases? | ||
// stat? | ||
RELEASE_ASSERT(error_code == kDNSServiceErr_NoError, | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"An unknown error has been returned by DNSServiceGetAddrInfoReply"); | ||
|
||
// Only add this address to the list if kDNSServiceFlagsAdd is set. Callback targets are purely | ||
// additive. | ||
if (flags & kDNSServiceFlagsAdd) { | ||
auto dns_response = buildDnsResponse(address, ttl); | ||
ENVOY_LOG(warn, "Address to add address={}, ttl={}", | ||
dns_response.address_->ip()->addressAsString(), ttl); | ||
|
||
if (!pending_cb_) { | ||
ENVOY_LOG(warn, "Adding to queries pending callback"); | ||
pending_cb_ = {ResolutionStatus::Success, {dns_response}}; | ||
parent_.addPendingQuery(this); | ||
} else { | ||
ENVOY_LOG(warn, "New address for query already pending flush"); | ||
pending_cb_->responses_.push_back(dns_response); | ||
} | ||
} | ||
|
||
if (!(flags & kDNSServiceFlagsMoreComing)) { | ||
/* (taken and edited from dns_sd.h) | ||
* Collective kDNSServiceFlagsMoreComing flag: | ||
* When [DNSServiceGetAddrInfoReply] are invoked using a shared DNSServiceRef, the | ||
* kDNSServiceFlagsMoreComing flag applies collectively to *all* active | ||
* operations sharing the same [main_sd_ref]. If the MoreComing flag is | ||
* set it means that there are more results queued on this parent DNSServiceRef, | ||
* but not necessarily more results for this particular callback function. | ||
* The implication of this for client programmers is that when a callback | ||
* is invoked with the MoreComing flag set, the code should update its | ||
* internal data structures with the new result (as is done above when calling | ||
* parent_.addPendingQuery(this))...Then, later when a callback is eventually invoked with the | ||
* MoreComing flag not set, the code should update *all* [pending queries] related to that | ||
* shared parent DNSServiceRef that need updating (i.e that have had DNSServiceGetAddrInfoReply | ||
* called on them since the last flush), not just the [queries] related to the particular | ||
* callback that happened to be the last one to be invoked. | ||
*/ | ||
ENVOY_LOG(warn, "DNS Resolver flushing queries pending callback"); | ||
parent_.flushPendingQueries(); | ||
} | ||
} | ||
|
||
DNSServiceErrorType | ||
AppleDnsResolverImpl::PendingResolution::dnsServiceGetAddrInfo(DnsLookupFamily dns_lookup_family) { | ||
DNSServiceProtocol protocol; | ||
switch (dns_lookup_family) { | ||
case DnsLookupFamily::V4Only: | ||
protocol = kDNSServiceProtocol_IPv4; | ||
break; | ||
case DnsLookupFamily::V6Only: | ||
protocol = kDNSServiceProtocol_IPv6; | ||
break; | ||
case DnsLookupFamily::Auto: | ||
protocol = kDNSServiceProtocol_IPv4 | kDNSServiceProtocol_IPv6; | ||
break; | ||
default: | ||
NOT_REACHED_GCOVR_EXCL_LINE; | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// TODO: explore caching: there are caching flags in the dns_sd.h flags, allow expired answers | ||
// from the cache? | ||
// TODO: explore validation via DNSSEC? | ||
return DNSServiceGetAddrInfo( | ||
&individual_sd_ref_, kDNSServiceFlagsShareConnection | kDNSServiceFlagsTimeout, 0, protocol, | ||
dns_name_.c_str(), | ||
/* | ||
* About Thread Safety (taken from inline documentation there): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd push this to the top of the file. It's one of the most important thing to have in mind when reading the code. |
||
* The dns_sd.h API does not presuppose any particular threading model, and consequently | ||
* does no locking internally (which would require linking with a specific threading library). | ||
* If the client concurrently, from multiple threads (or contexts), calls API routines using | ||
* the same DNSServiceRef, it is the client's responsibility to provide mutual exclusion for | ||
* that DNSServiceRef. | ||
*/ | ||
|
||
// Therefore, much like the c-ares implementation All calls and callbacks to the API need to | ||
// happen on the thread that owns the creating dispatcher. This is the case as callbacks are | ||
// driven by processing bytes in onEventCallback which run on the passed in dispatcher's event | ||
// loop. | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
[](DNSServiceRef, DNSServiceFlags flags, uint32_t interface_index, | ||
DNSServiceErrorType error_code, const char* hostname, const struct sockaddr* address, | ||
uint32_t ttl, void* context) { | ||
static_cast<PendingResolution*>(context)->onDNSServiceGetAddrInfoReply( | ||
junr03 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
flags, interface_index, error_code, hostname, address, ttl); | ||
}, | ||
this); | ||
} | ||
|
||
DnsResponse | ||
AppleDnsResolverImpl::PendingResolution::buildDnsResponse(const struct sockaddr* address, | ||
uint32_t ttl) { | ||
switch (address->sa_family) { | ||
case AF_INET: | ||
sockaddr_in address_in; | ||
memset(&address_in, 0, sizeof(address_in)); | ||
address_in.sin_family = AF_INET; | ||
address_in.sin_port = 0; | ||
address_in.sin_addr = reinterpret_cast<const sockaddr_in*>(address)->sin_addr; | ||
return {std::make_shared<const Address::Ipv4Instance>(&address_in), std::chrono::seconds(ttl)}; | ||
case AF_INET6: | ||
sockaddr_in6 address_in6; | ||
memset(&address_in6, 0, sizeof(address_in6)); | ||
address_in6.sin6_family = AF_INET6; | ||
address_in6.sin6_port = 0; | ||
address_in6.sin6_addr = reinterpret_cast<const sockaddr_in6*>(address)->sin6_addr; | ||
return {std::make_shared<const Address::Ipv6Instance>(address_in6), std::chrono::seconds(ttl)}; | ||
default: | ||
NOT_REACHED_GCOVR_EXCL_LINE; | ||
} | ||
} | ||
|
||
} // namespace Network | ||
} // namespace Envoy |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, just got around to looking into the details of this PR now. One question I have; what is the threading model here? Who controls the callbacks, which threads do they land on? Maybe a top-level comment explaining this would help. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/envoyproxy/envoy/pull/13074/files#diff-4ebdef3732b2b07e175aba41bcaf62abR280