From cea0b297011ff956ec6e46e3f5b459755eceeb0e Mon Sep 17 00:00:00 2001 From: Karthik Reddy Date: Tue, 23 Apr 2019 02:31:50 +0000 Subject: [PATCH 1/2] api: Add total_issued_requests to Upstream Locality and Endpoint Stats. Description: This field tracks the count of requests issued since the last report. This field will be used for global load balancing decisions. Risk Level: Low Testing: Compiles successfully. Signed-off-by: Karthik Reddy --- api/envoy/api/v2/endpoint/load_report.proto | 32 +++++++-------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/api/envoy/api/v2/endpoint/load_report.proto b/api/envoy/api/v2/endpoint/load_report.proto index e862074bd8eb..4fb137f97232 100644 --- a/api/envoy/api/v2/endpoint/load_report.proto +++ b/api/envoy/api/v2/endpoint/load_report.proto @@ -25,15 +25,6 @@ message UpstreamLocalityStats { // collected from. Zone and region names could be empty if unknown. core.Locality locality = 1; - // The total number of requests sent by this Envoy since the last report. This - // information is aggregated over all the upstream Endpoints. total_requests - // can be inferred from: - // - // .. code-block:: none - // - // total_requests = total_successful_requests + total_requests_in_progress + - // total_error_requests - // // The total number of requests successfully completed by the endpoints in the // locality. uint64 total_successful_requests = 2; @@ -45,6 +36,11 @@ message UpstreamLocalityStats { // aggregated over all endpoints in the locality. uint64 total_error_requests = 4; + // The total number of requests that were issued by this Envoy since + // the last report. This information is aggregated over all the + // upstream endpoints in the locality. + uint64 total_issued_requests = 8; + // Stats for multi-dimensional load balancing. repeated EndpointLoadMetricStats load_metric_stats = 5; @@ -66,16 +62,6 @@ message UpstreamEndpointStats { // endpoint. Envoy will pass this directly to the management server. google.protobuf.Struct metadata = 6; - // The total number of requests successfully completed by the endpoint. A - // single HTTP or gRPC request or stream is counted as one request. A TCP - // connection is also treated as one request. There is no explicit - // total_requests field below for an endpoint, but it may be inferred from: - // - // .. code-block:: none - // - // total_requests = total_successful_requests + total_requests_in_progress + - // total_error_requests - // // The total number of requests successfully completed by the endpoints in the // locality. These include non-5xx responses for HTTP, where errors // originate at the client and the endpoint responded successfully. For gRPC, @@ -97,6 +83,11 @@ message UpstreamEndpointStats { // - DataLoss uint64 total_error_requests = 4; + // The total number of requests that were issued to this endpoint + // since the last report. A single TCP connection, HTTP or gRPC + // request or stream is counted as one request. + uint64 total_issued_requests = 7; + // Stats for multi-dimensional load balancing. repeated EndpointLoadMetricStats load_metric_stats = 5; } @@ -138,8 +129,7 @@ message ClusterStats { // // .. code-block:: none // - // sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) + - // sum_locality(total_error_requests) + total_dropped_requests` + // sum_locality(total_issued_requests) + total_dropped_requests` // // The total number of dropped requests. This covers requests // deliberately dropped by the drop_overload policy and circuit breaking. From 325808e7276580b587289db0699d4ed3337a8c1c Mon Sep 17 00:00:00 2001 From: Karthik Reddy Date: Fri, 26 Apr 2019 21:57:45 +0000 Subject: [PATCH 2/2] LoadStatsReporter: Report total issued requests. Description: Envoy maintains a stats counter rq_total to track total requests made. By latching this counter when load reporting period begins, we are able to count the total requests issued in a load reporting interval. This information is then reported to management server via the field total_issued_requests. Risk Level: Low Testing: //test/integration:load_stats_reporter passes. Docs: Updated inline. Release notes: api: track and report requests issued since last load report. Signed-off-by: Karthik Reddy --- docs/root/intro/version_history.rst | 1 + source/common/upstream/load_stats_reporter.cc | 4 +++ .../load_stats_integration_test.cc | 27 +++++++++++-------- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index a33f3b0fb161..3b7cafecd7ed 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -4,6 +4,7 @@ Version history 1.11.0 (Pending) ================ * access log: added a new field for response code details in :ref:`file access logger` and :ref:`gRPC access logger`. +* api: track and report requests issued since last load report. * dubbo_proxy: support the :ref:`Dubbo proxy filter `. * eds: added support to specify max time for which endpoints can be used :ref:`gRPC filter `. * event: added :ref:`loop duration and poll delay statistics `. diff --git a/source/common/upstream/load_stats_reporter.cc b/source/common/upstream/load_stats_reporter.cc index 46b0b0d6336c..37ccc8caf1d6 100644 --- a/source/common/upstream/load_stats_reporter.cc +++ b/source/common/upstream/load_stats_reporter.cc @@ -63,10 +63,12 @@ void LoadStatsReporter::sendLoadStatsRequest() { uint64_t rq_success = 0; uint64_t rq_error = 0; uint64_t rq_active = 0; + uint64_t rq_issued = 0; for (auto host : hosts) { rq_success += host->stats().rq_success_.latch(); rq_error += host->stats().rq_error_.latch(); rq_active += host->stats().rq_active_.value(); + rq_issued += host->stats().rq_total_.latch(); } if (rq_success + rq_error + rq_active != 0) { auto* locality_stats = cluster_stats->add_upstream_locality_stats(); @@ -75,6 +77,7 @@ void LoadStatsReporter::sendLoadStatsRequest() { locality_stats->set_total_successful_requests(rq_success); locality_stats->set_total_error_requests(rq_error); locality_stats->set_total_requests_in_progress(rq_active); + locality_stats->set_total_issued_requests(rq_issued); } } } @@ -154,6 +157,7 @@ void LoadStatsReporter::startLoadReportPeriod() { for (auto host : host_set->hosts()) { host->stats().rq_success_.latch(); host->stats().rq_error_.latch(); + host->stats().rq_total_.latch(); } } cluster.info()->loadReportStats().upstream_rq_dropped_.latch(); diff --git a/test/integration/load_stats_integration_test.cc b/test/integration/load_stats_integration_test.cc index a7ebcc237a35..5930e3dfe18d 100644 --- a/test/integration/load_stats_integration_test.cc +++ b/test/integration/load_stats_integration_test.cc @@ -294,7 +294,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParamcounter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -381,7 +383,8 @@ TEST_P(LoadStatsIntegrationTest, Success) { // No locality for priority=1 since there's no "winter" endpoints. // The hosts for dragon were received because membership_total is accurate. - waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 4, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 4, 0, 0, 4)}); EXPECT_EQ(2, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(3, test_server_->counter("load_reporter.responses")->value()); @@ -397,7 +400,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { } waitForLoadStatsRequest( - {localityStats("winter", 2, 0, 0, 1), localityStats("dragon", 2, 0, 0, 1)}); + {localityStats("winter", 2, 0, 0, 2, 1), localityStats("dragon", 2, 0, 0, 2, 1)}); EXPECT_EQ(3, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(4, test_server_->counter("load_reporter.responses")->value()); EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value()); @@ -411,7 +414,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); } - waitForLoadStatsRequest({localityStats("winter", 1, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 1, 0, 0, 1)}); EXPECT_EQ(4, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(5, test_server_->counter("load_reporter.responses")->value()); EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value()); @@ -424,7 +427,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); sendAndReceiveUpstream(1); - waitForLoadStatsRequest({localityStats("winter", 3, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 3, 0, 0, 3)}); EXPECT_EQ(6, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(6, test_server_->counter("load_reporter.responses")->value()); @@ -438,7 +441,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); sendAndReceiveUpstream(1); - waitForLoadStatsRequest({localityStats("winter", 2, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 2, 0, 0, 2)}); EXPECT_EQ(8, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(7, test_server_->counter("load_reporter.responses")->value()); @@ -473,7 +476,8 @@ TEST_P(LoadStatsIntegrationTest, LocalityWeighted) { sendAndReceiveUpstream(0); // Verify we get the expect request distribution. - waitForLoadStatsRequest({localityStats("winter", 4, 0, 0), localityStats("dragon", 2, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("winter", 4, 0, 0, 4), localityStats("dragon", 2, 0, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -506,7 +510,8 @@ TEST_P(LoadStatsIntegrationTest, NoLocalLocality) { // order of locality stats is different to the Success case, where winter is // the local locality (and hence first in the list as per // HostsPerLocality::get()). - waitForLoadStatsRequest({localityStats("dragon", 2, 0, 0), localityStats("winter", 2, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("dragon", 2, 0, 0, 2), localityStats("winter", 2, 0, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -533,7 +538,7 @@ TEST_P(LoadStatsIntegrationTest, Error) { // This should count as "success" since non-5xx. sendAndReceiveUpstream(0, 404); - waitForLoadStatsRequest({localityStats("winter", 1, 1, 0)}); + waitForLoadStatsRequest({localityStats("winter", 1, 1, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(2, test_server_->counter("load_reporter.responses")->value()); @@ -553,7 +558,7 @@ TEST_P(LoadStatsIntegrationTest, InProgress) { requestLoadStatsResponse({"cluster_0"}); initiateClientConnection(); - waitForLoadStatsRequest({localityStats("winter", 0, 0, 1)}); + waitForLoadStatsRequest({localityStats("winter", 0, 0, 1, 1)}); waitForUpstreamResponse(0, 503); cleanupUpstreamAndDownstream();