From b682d83f178953e0fd52263d688ddc8b85b92b88 Mon Sep 17 00:00:00 2001 From: David Goffredo Date: Fri, 2 Jun 2023 12:02:29 -0400 Subject: [PATCH] log an error when the agent responds with a non-success HTTP status (#260) Currently, the AgentWriter passes all response bodies to the AgentHttpEncoder. The AgentHttpEncoder then interprets the body as a JSON object. However, when the HTTP response status does not indicate success, the agent does not return a JSON response body. This revision logs an error whenever a non-success HTTP response status is delivered by the agent. --- scripts/format.sh | 2 +- src/agent_writer.cpp | 31 +++++++++++++++++++++++++-- src/transport.cpp | 8 +++++++ src/transport.h | 5 +++++ test/agent_writer_test.cpp | 44 ++++++++++++++++++++++++++++++++++++++ test/mocks.h | 6 ++++++ 6 files changed, 93 insertions(+), 3 deletions(-) diff --git a/scripts/format.sh b/scripts/format.sh index d7f30707..e8f3d77b 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -5,7 +5,7 @@ set -e usage() { argv0="$0" - + cat < handle) { payload = trace_encoder_->payload(); trace_encoder_->clearTraces(); } // lock on mutex_ ends. - // Send spans, not in critical period. + // Send the spans. Note that this is not in the critical section above. bool success = retryFiniteOnFail( [&]() { return AgentWriter::postTraces(handle, headers, payload, logger_); }); + // Sending could fail. If it succeeds, then the HTTP response status + // could indicate an error or success. Also, an empty response body + // indicates an error even when the status is 200. if (success) { - trace_encoder_->handleResponse(handle->getResponse()); + const int response_status = handle->getResponseStatus(); + const std::string body = handle->getResponse(); + if (response_status == 0) { + std::ostringstream diagnostic; + diagnostic << "Datadog Agent returned response without an HTTP status and with the " + "following body of length " + << body.size() << ": " << body; + logger_->Log(LogLevel::error, diagnostic.str()); + } else if (response_status != 200) { + std::ostringstream diagnostic; + diagnostic << "Datadog Agent returned response with unexpected HTTP status " + << response_status << " and the following body of length " << body.size() + << ": " << body; + logger_->Log(LogLevel::error, diagnostic.str()); + } else if (body.empty()) { + logger_->Log(LogLevel::error, + "Datadog Agent returned response without a body. This tracer might be " + "sending batches of traces too frequently."); + } else { + // success + trace_encoder_->handleResponse(handle->getResponse()); + } } + // If `success == false`, then `postTraces` will have already logged + // an error. + // Let thread calling 'flush' know that we're done flushing. { std::unique_lock lock(mutex_); diff --git a/src/transport.cpp b/src/transport.cpp index d890fd4e..d4fac6b2 100644 --- a/src/transport.cpp +++ b/src/transport.cpp @@ -97,6 +97,14 @@ CURLcode CurlHandle::perform() { std::string CurlHandle::getError() { return std::string(curl_error_buffer_); } std::string CurlHandle::getResponse() { return response_buffer_.str(); } +int CurlHandle::getResponseStatus() { + long raw = 0; + // As of November 2003 (libcurl version 7.10.8), fetching the response code + // will not return an error. + // If there is no response code to return, then zero will be set. + (void)curl_easy_getinfo(handle_, CURLINFO_RESPONSE_CODE, &raw); + return static_cast(raw); +} } // namespace opentracing } // namespace datadog diff --git a/src/transport.h b/src/transport.h index a96e51e5..298f081b 100644 --- a/src/transport.h +++ b/src/transport.h @@ -24,6 +24,10 @@ class Handle { virtual CURLcode perform() = 0; virtual std::string getError() = 0; virtual std::string getResponse() = 0; + // Return the HTTP status of the response received, or return zero if no + // response was received or if there is no HTTP status associated with the + // response. + virtual int getResponseStatus() = 0; }; // A Handle that uses real curl to really send things. Not thread-safe. @@ -39,6 +43,7 @@ class CurlHandle : public Handle { CURLcode perform() override; std::string getError() override; std::string getResponse() override; + int getResponseStatus() override; private: // For things that need cleaning up if the constructor fails as well as on destruction. diff --git a/test/agent_writer_test.cpp b/test/agent_writer_test.cpp index 77b3533c..6baa6261 100644 --- a/test/agent_writer_test.cpp +++ b/test/agent_writer_test.cpp @@ -184,6 +184,50 @@ TEST_CASE("writer") { REQUIRE(sampler->config == ""); } + SECTION("handle error responses") { + using Catch::Matchers::Contains; + + // HTTP status zero indicates "no status." + handle->response_status = 0; + writer.write(make_trace( + {TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}})); + + writer.flush(std::chrono::seconds(10)); + REQUIRE(logger->records.size() != 0); + // The logged error diagnostic will say that there was no response status. + REQUIRE_THAT(logger->records.back().message, Contains("response without an HTTP status")); + + // HTTP status 200 with an empty body means that the response really should + // be 429 "too many requests," but the Agent is not configured to return + // that status and instead uses 200. + handle->response_status = 200; + writer.write(make_trace( + {TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}})); + + writer.flush(std::chrono::seconds(10)); + REQUIRE(logger->records.size() != 0); + // The logged error diagnostic will mention the lack of response. + REQUIRE_THAT(logger->records.back().message, Contains("response without a body")); + + // HTTP statuses other than 200 are unexpected. + std::vector statuses; + for (int i = 100; i < 200; ++i) { + statuses.push_back(i); + } + for (int i = 201; i < 600; ++i) { + statuses.push_back(i); + } + auto status = GENERATE_COPY(from_range(statuses)); + handle->response_status = status; + writer.write(make_trace( + {TestSpanData{"web", "service", "resource", "service.name", 1, 1, 0, 69, 420, 0}})); + + writer.flush(std::chrono::seconds(10)); + REQUIRE(logger->records.size() != 0); + // The logged error diagnostic will contain the response status. + REQUIRE_THAT(logger->records.back().message, Contains(" " + std::to_string(status) + " ")); + } + SECTION("queue does not grow indefinitely") { for (uint64_t i = 0; i < 30; i++) { // Only 25 actually get written. writer.write(make_trace( diff --git a/test/mocks.h b/test/mocks.h index ba3d5537..e7966b20 100644 --- a/test/mocks.h +++ b/test/mocks.h @@ -331,6 +331,11 @@ struct MockHandle : public Handle { return response; } + int getResponseStatus() override { + std::unique_lock lock(mutex); + return response_status; + } + // Note, this returns any traces that have been added to the request - NOT traces that have been // successfully posted. std::unique_ptr>> getTraces() { @@ -351,6 +356,7 @@ struct MockHandle : public Handle { std::map headers; std::string error = ""; std::string response = ""; + int response_status = 200; CURLcode rcode = CURLE_OK; std::atomic* is_destructed = nullptr; // Each time an perform is called, the next perform_result is used to determine if it