api: Add total_issued_requests to Upstream Locality and Endpoint Stat…

…s. (envoyproxy#6692) total_issued_requests field tracks the count of requests issued since the last report. This field will be used for global load balancing decisions. Envoy maintains a stats counter rq_total to track total requests made. By latching this counter when load reporting period begins, we are able to count the total requests issued in a load reporting interval. This information is then used to populate the field total_issued_requests. Risk Level: Low Testing: //test/integration:load_stats_reporter passes. Signed-off-by: Karthik Reddy <rekarthik@google.com> Signed-off-by: Jeff Piazza <jeffpiazza@google.com>
jeffpiazza-google · May 3, 2019 · 9e84724 · 9e84724
1 parent dbbd2cb
commit 9e84724
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 32 deletions.
diff --git a/api/envoy/api/v2/endpoint/load_report.proto b/api/envoy/api/v2/endpoint/load_report.proto
@@ -25,15 +25,6 @@ message UpstreamLocalityStats {
   // collected from. Zone and region names could be empty if unknown.
   core.Locality locality = 1;
 
-  // The total number of requests sent by this Envoy since the last report. This
-  // information is aggregated over all the upstream Endpoints. total_requests
-  // can be inferred from:
-  //
-  // .. code-block:: none
-  //
-  //   total_requests = total_successful_requests + total_requests_in_progress +
-  //     total_error_requests
-  //
   // The total number of requests successfully completed by the endpoints in the
   // locality.
   uint64 total_successful_requests = 2;
@@ -45,6 +36,11 @@ message UpstreamLocalityStats {
   // aggregated over all endpoints in the locality.
   uint64 total_error_requests = 4;
 
+  // The total number of requests that were issued by this Envoy since
+  // the last report. This information is aggregated over all the
+  // upstream endpoints in the locality.
+  uint64 total_issued_requests = 8;
+
   // Stats for multi-dimensional load balancing.
   repeated EndpointLoadMetricStats load_metric_stats = 5;
 
@@ -66,16 +62,6 @@ message UpstreamEndpointStats {
   // endpoint. Envoy will pass this directly to the management server.
   google.protobuf.Struct metadata = 6;
 
-  // The total number of requests successfully completed by the endpoint. A
-  // single HTTP or gRPC request or stream is counted as one request. A TCP
-  // connection is also treated as one request. There is no explicit
-  // total_requests field below for an endpoint, but it may be inferred from:
-  //
-  // .. code-block:: none
-  //
-  //   total_requests = total_successful_requests + total_requests_in_progress +
-  //     total_error_requests
-  //
   // The total number of requests successfully completed by the endpoints in the
   // locality. These include non-5xx responses for HTTP, where errors
   // originate at the client and the endpoint responded successfully. For gRPC,
@@ -97,6 +83,11 @@ message UpstreamEndpointStats {
   //   - DataLoss
   uint64 total_error_requests = 4;
 
+  // The total number of requests that were issued to this endpoint
+  // since the last report. A single TCP connection, HTTP or gRPC
+  // request or stream is counted as one request.
+  uint64 total_issued_requests = 7;
+
   // Stats for multi-dimensional load balancing.
   repeated EndpointLoadMetricStats load_metric_stats = 5;
 }
@@ -138,8 +129,7 @@ message ClusterStats {
   //
   // .. code-block:: none
   //
-  //   sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) +
-  //     sum_locality(total_error_requests) + total_dropped_requests`
+  //   sum_locality(total_issued_requests) + total_dropped_requests`
   //
   // The total number of dropped requests. This covers requests
   // deliberately dropped by the drop_overload policy and circuit breaking.

diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
@@ -4,6 +4,7 @@ Version history
 1.11.0 (Pending)
 ================
 * access log: added a new field for response code details in :ref:`file access logger<config_access_log_format_response_code_details>` and :ref:`gRPC access logger<envoy_api_field_data.accesslog.v2.HTTPResponseProperties.response_code_details>`.
+* api: track and report requests issued since last load report.
 * dubbo_proxy: support the :ref:`Dubbo proxy filter <config_network_filters_dubbo_proxy>`.
 * eds: added support to specify max time for which endpoints can be used :ref:`gRPC filter <envoy_api_msg_ClusterLoadAssignment.Policy>`.
 * event: added :ref:`loop duration and poll delay statistics <operations_performance>`.

diff --git a/source/common/upstream/load_stats_reporter.cc b/source/common/upstream/load_stats_reporter.cc
@@ -63,10 +63,12 @@ void LoadStatsReporter::sendLoadStatsRequest() {
         uint64_t rq_success = 0;
         uint64_t rq_error = 0;
         uint64_t rq_active = 0;
+        uint64_t rq_issued = 0;
         for (auto host : hosts) {
           rq_success += host->stats().rq_success_.latch();
           rq_error += host->stats().rq_error_.latch();
           rq_active += host->stats().rq_active_.value();
+          rq_issued += host->stats().rq_total_.latch();
         }
         if (rq_success + rq_error + rq_active != 0) {
           auto* locality_stats = cluster_stats->add_upstream_locality_stats();
@@ -75,6 +77,7 @@ void LoadStatsReporter::sendLoadStatsRequest() {
           locality_stats->set_total_successful_requests(rq_success);
           locality_stats->set_total_error_requests(rq_error);
           locality_stats->set_total_requests_in_progress(rq_active);
+          locality_stats->set_total_issued_requests(rq_issued);
         }
       }
     }
@@ -154,6 +157,7 @@ void LoadStatsReporter::startLoadReportPeriod() {
       for (auto host : host_set->hosts()) {
         host->stats().rq_success_.latch();
         host->stats().rq_error_.latch();
+        host->stats().rq_total_.latch();
       }
     }
     cluster.info()->loadReportStats().upstream_rq_dropped_.latch();

diff --git a/test/integration/load_stats_integration_test.cc b/test/integration/load_stats_integration_test.cc
@@ -294,7 +294,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:
 
   envoy::api::v2::endpoint::UpstreamLocalityStats localityStats(const std::string& sub_zone,
                                                                 uint64_t success, uint64_t error,
-                                                                uint64_t active,
+                                                                uint64_t active, uint64_t issued,
                                                                 uint32_t priority = 0) {
     envoy::api::v2::endpoint::UpstreamLocalityStats locality_stats;
     auto* locality = locality_stats.mutable_locality();
@@ -304,6 +304,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:
     locality_stats.set_total_successful_requests(success);
     locality_stats.set_total_error_requests(error);
     locality_stats.set_total_requests_in_progress(active);
+    locality_stats.set_total_issued_requests(issued);
     locality_stats.set_priority(priority);
     return locality_stats;
   }
@@ -364,7 +365,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   }
 
   // Verify we do not get empty stats for non-zero priorities.
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -381,7 +383,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {
 
   // No locality for priority=1 since there's no "winter" endpoints.
   // The hosts for dragon were received because membership_total is accurate.
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 4, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 4, 0, 0, 4)});
 
   EXPECT_EQ(2, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(3, test_server_->counter("load_reporter.responses")->value());
@@ -397,7 +400,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   }
 
   waitForLoadStatsRequest(
-      {localityStats("winter", 2, 0, 0, 1), localityStats("dragon", 2, 0, 0, 1)});
+      {localityStats("winter", 2, 0, 0, 2, 1), localityStats("dragon", 2, 0, 0, 2, 1)});
   EXPECT_EQ(3, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(4, test_server_->counter("load_reporter.responses")->value());
   EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
@@ -411,7 +414,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
     sendAndReceiveUpstream(1);
   }
 
-  waitForLoadStatsRequest({localityStats("winter", 1, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 1, 0, 0, 1)});
   EXPECT_EQ(4, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(5, test_server_->counter("load_reporter.responses")->value());
   EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
@@ -424,7 +427,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   sendAndReceiveUpstream(1);
   sendAndReceiveUpstream(1);
 
-  waitForLoadStatsRequest({localityStats("winter", 3, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 3, 0, 0, 3)});
 
   EXPECT_EQ(6, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(6, test_server_->counter("load_reporter.responses")->value());
@@ -438,7 +441,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   sendAndReceiveUpstream(1);
   sendAndReceiveUpstream(1);
 
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0, 2)});
 
   EXPECT_EQ(8, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(7, test_server_->counter("load_reporter.responses")->value());
@@ -473,7 +476,8 @@ TEST_P(LoadStatsIntegrationTest, LocalityWeighted) {
   sendAndReceiveUpstream(0);
 
   // Verify we get the expect request distribution.
-  waitForLoadStatsRequest({localityStats("winter", 4, 0, 0), localityStats("dragon", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 4, 0, 0, 4), localityStats("dragon", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -506,7 +510,8 @@ TEST_P(LoadStatsIntegrationTest, NoLocalLocality) {
   // order of locality stats is different to the Success case, where winter is
   // the local locality (and hence first in the list as per
   // HostsPerLocality::get()).
-  waitForLoadStatsRequest({localityStats("dragon", 2, 0, 0), localityStats("winter", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("dragon", 2, 0, 0, 2), localityStats("winter", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -533,7 +538,7 @@ TEST_P(LoadStatsIntegrationTest, Error) {
   // This should count as "success" since non-5xx.
   sendAndReceiveUpstream(0, 404);
 
-  waitForLoadStatsRequest({localityStats("winter", 1, 1, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 1, 1, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(2, test_server_->counter("load_reporter.responses")->value());
@@ -553,7 +558,7 @@ TEST_P(LoadStatsIntegrationTest, InProgress) {
 
   requestLoadStatsResponse({"cluster_0"});
   initiateClientConnection();
-  waitForLoadStatsRequest({localityStats("winter", 0, 0, 1)});
+  waitForLoadStatsRequest({localityStats("winter", 0, 0, 1, 1)});
 
   waitForUpstreamResponse(0, 503);
   cleanupUpstreamAndDownstream();