Skip to content

Commit

Permalink
log an error when the agent responds with a non-success HTTP status (#…
Browse files Browse the repository at this point in the history
…260)

Currently, the AgentWriter passes all response bodies to the AgentHttpEncoder. The AgentHttpEncoder then interprets the body as a JSON object. However, when the HTTP response status does not indicate success, the agent does not return a JSON response body.

This revision logs an error whenever a non-success HTTP response status is delivered by the agent.
  • Loading branch information
dgoffredo authored Jun 2, 2023
1 parent 4c4677f commit b682d83
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 3 deletions.
2 changes: 1 addition & 1 deletion scripts/format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e

usage() {
argv0="$0"

cat <<END_USAGE
format.sh - format Datadog C++ source code
Expand Down
31 changes: 29 additions & 2 deletions src/agent_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,39 @@ void AgentWriter::startWriting(std::unique_ptr<Handle> handle) {
payload = trace_encoder_->payload();
trace_encoder_->clearTraces();
} // lock on mutex_ ends.
// Send spans, not in critical period.
// Send the spans. Note that this is not in the critical section above.
bool success = retryFiniteOnFail(
[&]() { return AgentWriter::postTraces(handle, headers, payload, logger_); });
// Sending could fail. If it succeeds, then the HTTP response status
// could indicate an error or success. Also, an empty response body
// indicates an error even when the status is 200.
if (success) {
trace_encoder_->handleResponse(handle->getResponse());
const int response_status = handle->getResponseStatus();
const std::string body = handle->getResponse();
if (response_status == 0) {
std::ostringstream diagnostic;
diagnostic << "Datadog Agent returned response without an HTTP status and with the "
"following body of length "
<< body.size() << ": " << body;
logger_->Log(LogLevel::error, diagnostic.str());
} else if (response_status != 200) {
std::ostringstream diagnostic;
diagnostic << "Datadog Agent returned response with unexpected HTTP status "
<< response_status << " and the following body of length " << body.size()
<< ": " << body;
logger_->Log(LogLevel::error, diagnostic.str());
} else if (body.empty()) {
logger_->Log(LogLevel::error,
"Datadog Agent returned response without a body. This tracer might be "
"sending batches of traces too frequently.");
} else {
// success
trace_encoder_->handleResponse(handle->getResponse());
}
}
// If `success == false`, then `postTraces` will have already logged
// an error.

// Let thread calling 'flush' know that we're done flushing.
{
std::unique_lock<std::mutex> lock(mutex_);
Expand Down
8 changes: 8 additions & 0 deletions src/transport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ CURLcode CurlHandle::perform() {

std::string CurlHandle::getError() { return std::string(curl_error_buffer_); }
std::string CurlHandle::getResponse() { return response_buffer_.str(); }
int CurlHandle::getResponseStatus() {
long raw = 0;
// As of November 2003 (libcurl version 7.10.8), fetching the response code
// will not return an error.
// If there is no response code to return, then zero will be set.
(void)curl_easy_getinfo(handle_, CURLINFO_RESPONSE_CODE, &raw);
return static_cast<int>(raw);
}

} // namespace opentracing
} // namespace datadog
5 changes: 5 additions & 0 deletions src/transport.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class Handle {
virtual CURLcode perform() = 0;
virtual std::string getError() = 0;
virtual std::string getResponse() = 0;
// Return the HTTP status of the response received, or return zero if no
// response was received or if there is no HTTP status associated with the
// response.
virtual int getResponseStatus() = 0;
};

// A Handle that uses real curl to really send things. Not thread-safe.
Expand All @@ -39,6 +43,7 @@ class CurlHandle : public Handle {
CURLcode perform() override;
std::string getError() override;
std::string getResponse() override;
int getResponseStatus() override;

private:
// For things that need cleaning up if the constructor fails as well as on destruction.
Expand Down
44 changes: 44 additions & 0 deletions test/agent_writer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,50 @@ TEST_CASE("writer") {
REQUIRE(sampler->config == "");
}

SECTION("handle error responses") {
using Catch::Matchers::Contains;

// HTTP status zero indicates "no status."
handle->response_status = 0;
writer.write(make_trace(
{TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}}));

writer.flush(std::chrono::seconds(10));
REQUIRE(logger->records.size() != 0);
// The logged error diagnostic will say that there was no response status.
REQUIRE_THAT(logger->records.back().message, Contains("response without an HTTP status"));

// HTTP status 200 with an empty body means that the response really should
// be 429 "too many requests," but the Agent is not configured to return
// that status and instead uses 200.
handle->response_status = 200;
writer.write(make_trace(
{TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}}));

writer.flush(std::chrono::seconds(10));
REQUIRE(logger->records.size() != 0);
// The logged error diagnostic will mention the lack of response.
REQUIRE_THAT(logger->records.back().message, Contains("response without a body"));

// HTTP statuses other than 200 are unexpected.
std::vector<int> statuses;
for (int i = 100; i < 200; ++i) {
statuses.push_back(i);
}
for (int i = 201; i < 600; ++i) {
statuses.push_back(i);
}
auto status = GENERATE_COPY(from_range(statuses));
handle->response_status = status;
writer.write(make_trace(
{TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}}));

writer.flush(std::chrono::seconds(10));
REQUIRE(logger->records.size() != 0);
// The logged error diagnostic will contain the response status.
REQUIRE_THAT(logger->records.back().message, Contains(" " + std::to_string(status) + " "));
}

SECTION("queue does not grow indefinitely") {
for (uint64_t i = 0; i < 30; i++) { // Only 25 actually get written.
writer.write(make_trace(
Expand Down
6 changes: 6 additions & 0 deletions test/mocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ struct MockHandle : public Handle {
return response;
}

int getResponseStatus() override {
std::unique_lock<std::mutex> lock(mutex);
return response_status;
}

// Note, this returns any traces that have been added to the request - NOT traces that have been
// successfully posted.
std::unique_ptr<std::vector<std::vector<TestSpanData>>> getTraces() {
Expand All @@ -351,6 +356,7 @@ struct MockHandle : public Handle {
std::map<std::string, std::string> headers;
std::string error = "";
std::string response = "";
int response_status = 200;
CURLcode rcode = CURLE_OK;
std::atomic<bool>* is_destructed = nullptr;
// Each time an perform is called, the next perform_result is used to determine if it
Expand Down

0 comments on commit b682d83

Please sign in to comment.