From cea0b297011ff956ec6e46e3f5b459755eceeb0e Mon Sep 17 00:00:00 2001
From: Karthik Reddy <rekarthik@google.com>
Date: Tue, 23 Apr 2019 02:31:50 +0000
Subject: [PATCH 1/2] api: Add total_issued_requests to Upstream Locality and
 Endpoint Stats.

Description: This field tracks the count of requests issued since the
last report. This field will be used for global load balancing
decisions.

Risk Level: Low
Testing: Compiles successfully.

Signed-off-by: Karthik Reddy <rekarthik@google.com>
---
 api/envoy/api/v2/endpoint/load_report.proto | 32 +++++++--------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/api/envoy/api/v2/endpoint/load_report.proto b/api/envoy/api/v2/endpoint/load_report.proto
index e862074bd8eb..4fb137f97232 100644
--- a/api/envoy/api/v2/endpoint/load_report.proto
+++ b/api/envoy/api/v2/endpoint/load_report.proto
@@ -25,15 +25,6 @@ message UpstreamLocalityStats {
   // collected from. Zone and region names could be empty if unknown.
   core.Locality locality = 1;
 
-  // The total number of requests sent by this Envoy since the last report. This
-  // information is aggregated over all the upstream Endpoints. total_requests
-  // can be inferred from:
-  //
-  // .. code-block:: none
-  //
-  //   total_requests = total_successful_requests + total_requests_in_progress +
-  //     total_error_requests
-  //
   // The total number of requests successfully completed by the endpoints in the
   // locality.
   uint64 total_successful_requests = 2;
@@ -45,6 +36,11 @@ message UpstreamLocalityStats {
   // aggregated over all endpoints in the locality.
   uint64 total_error_requests = 4;
 
+  // The total number of requests that were issued by this Envoy since
+  // the last report. This information is aggregated over all the
+  // upstream endpoints in the locality.
+  uint64 total_issued_requests = 8;
+
   // Stats for multi-dimensional load balancing.
   repeated EndpointLoadMetricStats load_metric_stats = 5;
 
@@ -66,16 +62,6 @@ message UpstreamEndpointStats {
   // endpoint. Envoy will pass this directly to the management server.
   google.protobuf.Struct metadata = 6;
 
-  // The total number of requests successfully completed by the endpoint. A
-  // single HTTP or gRPC request or stream is counted as one request. A TCP
-  // connection is also treated as one request. There is no explicit
-  // total_requests field below for an endpoint, but it may be inferred from:
-  //
-  // .. code-block:: none
-  //
-  //   total_requests = total_successful_requests + total_requests_in_progress +
-  //     total_error_requests
-  //
   // The total number of requests successfully completed by the endpoints in the
   // locality. These include non-5xx responses for HTTP, where errors
   // originate at the client and the endpoint responded successfully. For gRPC,
@@ -97,6 +83,11 @@ message UpstreamEndpointStats {
   //   - DataLoss
   uint64 total_error_requests = 4;
 
+  // The total number of requests that were issued to this endpoint
+  // since the last report. A single TCP connection, HTTP or gRPC
+  // request or stream is counted as one request.
+  uint64 total_issued_requests = 7;
+
   // Stats for multi-dimensional load balancing.
   repeated EndpointLoadMetricStats load_metric_stats = 5;
 }
@@ -138,8 +129,7 @@ message ClusterStats {
   //
   // .. code-block:: none
   //
-  //   sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) +
-  //     sum_locality(total_error_requests) + total_dropped_requests`
+  //   sum_locality(total_issued_requests) + total_dropped_requests`
   //
   // The total number of dropped requests. This covers requests
   // deliberately dropped by the drop_overload policy and circuit breaking.

From 325808e7276580b587289db0699d4ed3337a8c1c Mon Sep 17 00:00:00 2001
From: Karthik Reddy <rekarthik@google.com>
Date: Fri, 26 Apr 2019 21:57:45 +0000
Subject: [PATCH 2/2] LoadStatsReporter: Report total issued requests.

Description: Envoy maintains a stats counter rq_total to track total
requests made. By latching this counter when load reporting period
begins, we are able to count the total requests issued in a load
reporting interval. This information is then reported to management
server via the field total_issued_requests.

Risk Level: Low
Testing: //test/integration:load_stats_reporter passes.
Docs: Updated inline.
Release notes: api: track and report requests issued since last load
report.

Signed-off-by: Karthik Reddy <rekarthik@google.com>
---
 docs/root/intro/version_history.rst           |  1 +
 source/common/upstream/load_stats_reporter.cc |  4 +++
 .../load_stats_integration_test.cc            | 27 +++++++++++--------
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst
index a33f3b0fb161..3b7cafecd7ed 100644
--- a/docs/root/intro/version_history.rst
+++ b/docs/root/intro/version_history.rst
@@ -4,6 +4,7 @@ Version history
 1.11.0 (Pending)
 ================
 * access log: added a new field for response code details in :ref:`file access logger<config_access_log_format_response_code_details>` and :ref:`gRPC access logger<envoy_api_field_data.accesslog.v2.HTTPResponseProperties.response_code_details>`.
+* api: track and report requests issued since last load report.
 * dubbo_proxy: support the :ref:`Dubbo proxy filter <config_network_filters_dubbo_proxy>`.
 * eds: added support to specify max time for which endpoints can be used :ref:`gRPC filter <envoy_api_msg_ClusterLoadAssignment.Policy>`.
 * event: added :ref:`loop duration and poll delay statistics <operations_performance>`.
diff --git a/source/common/upstream/load_stats_reporter.cc b/source/common/upstream/load_stats_reporter.cc
index 46b0b0d6336c..37ccc8caf1d6 100644
--- a/source/common/upstream/load_stats_reporter.cc
+++ b/source/common/upstream/load_stats_reporter.cc
@@ -63,10 +63,12 @@ void LoadStatsReporter::sendLoadStatsRequest() {
         uint64_t rq_success = 0;
         uint64_t rq_error = 0;
         uint64_t rq_active = 0;
+        uint64_t rq_issued = 0;
         for (auto host : hosts) {
           rq_success += host->stats().rq_success_.latch();
           rq_error += host->stats().rq_error_.latch();
           rq_active += host->stats().rq_active_.value();
+          rq_issued += host->stats().rq_total_.latch();
         }
         if (rq_success + rq_error + rq_active != 0) {
           auto* locality_stats = cluster_stats->add_upstream_locality_stats();
@@ -75,6 +77,7 @@ void LoadStatsReporter::sendLoadStatsRequest() {
           locality_stats->set_total_successful_requests(rq_success);
           locality_stats->set_total_error_requests(rq_error);
           locality_stats->set_total_requests_in_progress(rq_active);
+          locality_stats->set_total_issued_requests(rq_issued);
         }
       }
     }
@@ -154,6 +157,7 @@ void LoadStatsReporter::startLoadReportPeriod() {
       for (auto host : host_set->hosts()) {
         host->stats().rq_success_.latch();
         host->stats().rq_error_.latch();
+        host->stats().rq_total_.latch();
       }
     }
     cluster.info()->loadReportStats().upstream_rq_dropped_.latch();
diff --git a/test/integration/load_stats_integration_test.cc b/test/integration/load_stats_integration_test.cc
index a7ebcc237a35..5930e3dfe18d 100644
--- a/test/integration/load_stats_integration_test.cc
+++ b/test/integration/load_stats_integration_test.cc
@@ -294,7 +294,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:
 
   envoy::api::v2::endpoint::UpstreamLocalityStats localityStats(const std::string& sub_zone,
                                                                 uint64_t success, uint64_t error,
-                                                                uint64_t active,
+                                                                uint64_t active, uint64_t issued,
                                                                 uint32_t priority = 0) {
     envoy::api::v2::endpoint::UpstreamLocalityStats locality_stats;
     auto* locality = locality_stats.mutable_locality();
@@ -304,6 +304,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:
     locality_stats.set_total_successful_requests(success);
     locality_stats.set_total_error_requests(error);
     locality_stats.set_total_requests_in_progress(active);
+    locality_stats.set_total_issued_requests(issued);
     locality_stats.set_priority(priority);
     return locality_stats;
   }
@@ -364,7 +365,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   }
 
   // Verify we do not get empty stats for non-zero priorities.
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -381,7 +383,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {
 
   // No locality for priority=1 since there's no "winter" endpoints.
   // The hosts for dragon were received because membership_total is accurate.
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 4, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 4, 0, 0, 4)});
 
   EXPECT_EQ(2, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(3, test_server_->counter("load_reporter.responses")->value());
@@ -397,7 +400,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   }
 
   waitForLoadStatsRequest(
-      {localityStats("winter", 2, 0, 0, 1), localityStats("dragon", 2, 0, 0, 1)});
+      {localityStats("winter", 2, 0, 0, 2, 1), localityStats("dragon", 2, 0, 0, 2, 1)});
   EXPECT_EQ(3, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(4, test_server_->counter("load_reporter.responses")->value());
   EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
@@ -411,7 +414,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
     sendAndReceiveUpstream(1);
   }
 
-  waitForLoadStatsRequest({localityStats("winter", 1, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 1, 0, 0, 1)});
   EXPECT_EQ(4, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(5, test_server_->counter("load_reporter.responses")->value());
   EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
@@ -424,7 +427,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   sendAndReceiveUpstream(1);
   sendAndReceiveUpstream(1);
 
-  waitForLoadStatsRequest({localityStats("winter", 3, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 3, 0, 0, 3)});
 
   EXPECT_EQ(6, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(6, test_server_->counter("load_reporter.responses")->value());
@@ -438,7 +441,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
   sendAndReceiveUpstream(1);
   sendAndReceiveUpstream(1);
 
-  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 2, 0, 0, 2)});
 
   EXPECT_EQ(8, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(7, test_server_->counter("load_reporter.responses")->value());
@@ -473,7 +476,8 @@ TEST_P(LoadStatsIntegrationTest, LocalityWeighted) {
   sendAndReceiveUpstream(0);
 
   // Verify we get the expect request distribution.
-  waitForLoadStatsRequest({localityStats("winter", 4, 0, 0), localityStats("dragon", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("winter", 4, 0, 0, 4), localityStats("dragon", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -506,7 +510,8 @@ TEST_P(LoadStatsIntegrationTest, NoLocalLocality) {
   // order of locality stats is different to the Success case, where winter is
   // the local locality (and hence first in the list as per
   // HostsPerLocality::get()).
-  waitForLoadStatsRequest({localityStats("dragon", 2, 0, 0), localityStats("winter", 2, 0, 0)});
+  waitForLoadStatsRequest(
+      {localityStats("dragon", 2, 0, 0, 2), localityStats("winter", 2, 0, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   // On slow machines, more than one load stats response may be pushed while we are simulating load.
@@ -533,7 +538,7 @@ TEST_P(LoadStatsIntegrationTest, Error) {
   // This should count as "success" since non-5xx.
   sendAndReceiveUpstream(0, 404);
 
-  waitForLoadStatsRequest({localityStats("winter", 1, 1, 0)});
+  waitForLoadStatsRequest({localityStats("winter", 1, 1, 0, 2)});
 
   EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
   EXPECT_LE(2, test_server_->counter("load_reporter.responses")->value());
@@ -553,7 +558,7 @@ TEST_P(LoadStatsIntegrationTest, InProgress) {
 
   requestLoadStatsResponse({"cluster_0"});
   initiateClientConnection();
-  waitForLoadStatsRequest({localityStats("winter", 0, 0, 1)});
+  waitForLoadStatsRequest({localityStats("winter", 0, 0, 1, 1)});
 
   waitForUpstreamResponse(0, 503);
   cleanupUpstreamAndDownstream();