diff --git a/api/envoy/api/v2/endpoint/load_report.proto b/api/envoy/api/v2/endpoint/load_report.proto index e862074bd8eb..4fb137f97232 100644 --- a/api/envoy/api/v2/endpoint/load_report.proto +++ b/api/envoy/api/v2/endpoint/load_report.proto @@ -25,15 +25,6 @@ message UpstreamLocalityStats { // collected from. Zone and region names could be empty if unknown. core.Locality locality = 1; - // The total number of requests sent by this Envoy since the last report. This - // information is aggregated over all the upstream Endpoints. total_requests - // can be inferred from: - // - // .. code-block:: none - // - // total_requests = total_successful_requests + total_requests_in_progress + - // total_error_requests - // // The total number of requests successfully completed by the endpoints in the // locality. uint64 total_successful_requests = 2; @@ -45,6 +36,11 @@ message UpstreamLocalityStats { // aggregated over all endpoints in the locality. uint64 total_error_requests = 4; + // The total number of requests that were issued by this Envoy since + // the last report. This information is aggregated over all the + // upstream endpoints in the locality. + uint64 total_issued_requests = 8; + // Stats for multi-dimensional load balancing. repeated EndpointLoadMetricStats load_metric_stats = 5; @@ -66,16 +62,6 @@ message UpstreamEndpointStats { // endpoint. Envoy will pass this directly to the management server. google.protobuf.Struct metadata = 6; - // The total number of requests successfully completed by the endpoint. A - // single HTTP or gRPC request or stream is counted as one request. A TCP - // connection is also treated as one request. There is no explicit - // total_requests field below for an endpoint, but it may be inferred from: - // - // .. code-block:: none - // - // total_requests = total_successful_requests + total_requests_in_progress + - // total_error_requests - // // The total number of requests successfully completed by the endpoints in the // locality. These include non-5xx responses for HTTP, where errors // originate at the client and the endpoint responded successfully. For gRPC, @@ -97,6 +83,11 @@ message UpstreamEndpointStats { // - DataLoss uint64 total_error_requests = 4; + // The total number of requests that were issued to this endpoint + // since the last report. A single TCP connection, HTTP or gRPC + // request or stream is counted as one request. + uint64 total_issued_requests = 7; + // Stats for multi-dimensional load balancing. repeated EndpointLoadMetricStats load_metric_stats = 5; } @@ -138,8 +129,7 @@ message ClusterStats { // // .. code-block:: none // - // sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) + - // sum_locality(total_error_requests) + total_dropped_requests` + // sum_locality(total_issued_requests) + total_dropped_requests` // // The total number of dropped requests. This covers requests // deliberately dropped by the drop_overload policy and circuit breaking. diff --git a/docs/root/intro/version_history.rst b/docs/root/intro/version_history.rst index 0c86cbd4d113..c876eaac69d3 100644 --- a/docs/root/intro/version_history.rst +++ b/docs/root/intro/version_history.rst @@ -4,6 +4,7 @@ Version history 1.11.0 (Pending) ================ * access log: added a new field for response code details in :ref:`file access logger` and :ref:`gRPC access logger`. +* api: track and report requests issued since last load report. * dubbo_proxy: support the :ref:`Dubbo proxy filter `. * eds: added support to specify max time for which endpoints can be used :ref:`gRPC filter `. * event: added :ref:`loop duration and poll delay statistics `. diff --git a/source/common/upstream/load_stats_reporter.cc b/source/common/upstream/load_stats_reporter.cc index 46b0b0d6336c..37ccc8caf1d6 100644 --- a/source/common/upstream/load_stats_reporter.cc +++ b/source/common/upstream/load_stats_reporter.cc @@ -63,10 +63,12 @@ void LoadStatsReporter::sendLoadStatsRequest() { uint64_t rq_success = 0; uint64_t rq_error = 0; uint64_t rq_active = 0; + uint64_t rq_issued = 0; for (auto host : hosts) { rq_success += host->stats().rq_success_.latch(); rq_error += host->stats().rq_error_.latch(); rq_active += host->stats().rq_active_.value(); + rq_issued += host->stats().rq_total_.latch(); } if (rq_success + rq_error + rq_active != 0) { auto* locality_stats = cluster_stats->add_upstream_locality_stats(); @@ -75,6 +77,7 @@ void LoadStatsReporter::sendLoadStatsRequest() { locality_stats->set_total_successful_requests(rq_success); locality_stats->set_total_error_requests(rq_error); locality_stats->set_total_requests_in_progress(rq_active); + locality_stats->set_total_issued_requests(rq_issued); } } } @@ -154,6 +157,7 @@ void LoadStatsReporter::startLoadReportPeriod() { for (auto host : host_set->hosts()) { host->stats().rq_success_.latch(); host->stats().rq_error_.latch(); + host->stats().rq_total_.latch(); } } cluster.info()->loadReportStats().upstream_rq_dropped_.latch(); diff --git a/test/integration/load_stats_integration_test.cc b/test/integration/load_stats_integration_test.cc index a7ebcc237a35..5930e3dfe18d 100644 --- a/test/integration/load_stats_integration_test.cc +++ b/test/integration/load_stats_integration_test.cc @@ -294,7 +294,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParamcounter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -381,7 +383,8 @@ TEST_P(LoadStatsIntegrationTest, Success) { // No locality for priority=1 since there's no "winter" endpoints. // The hosts for dragon were received because membership_total is accurate. - waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 4, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 4, 0, 0, 4)}); EXPECT_EQ(2, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(3, test_server_->counter("load_reporter.responses")->value()); @@ -397,7 +400,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { } waitForLoadStatsRequest( - {localityStats("winter", 2, 0, 0, 1), localityStats("dragon", 2, 0, 0, 1)}); + {localityStats("winter", 2, 0, 0, 2, 1), localityStats("dragon", 2, 0, 0, 2, 1)}); EXPECT_EQ(3, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(4, test_server_->counter("load_reporter.responses")->value()); EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value()); @@ -411,7 +414,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); } - waitForLoadStatsRequest({localityStats("winter", 1, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 1, 0, 0, 1)}); EXPECT_EQ(4, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(5, test_server_->counter("load_reporter.responses")->value()); EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value()); @@ -424,7 +427,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); sendAndReceiveUpstream(1); - waitForLoadStatsRequest({localityStats("winter", 3, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 3, 0, 0, 3)}); EXPECT_EQ(6, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(6, test_server_->counter("load_reporter.responses")->value()); @@ -438,7 +441,7 @@ TEST_P(LoadStatsIntegrationTest, Success) { sendAndReceiveUpstream(1); sendAndReceiveUpstream(1); - waitForLoadStatsRequest({localityStats("winter", 2, 0, 0)}); + waitForLoadStatsRequest({localityStats("winter", 2, 0, 0, 2)}); EXPECT_EQ(8, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(7, test_server_->counter("load_reporter.responses")->value()); @@ -473,7 +476,8 @@ TEST_P(LoadStatsIntegrationTest, LocalityWeighted) { sendAndReceiveUpstream(0); // Verify we get the expect request distribution. - waitForLoadStatsRequest({localityStats("winter", 4, 0, 0), localityStats("dragon", 2, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("winter", 4, 0, 0, 4), localityStats("dragon", 2, 0, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -506,7 +510,8 @@ TEST_P(LoadStatsIntegrationTest, NoLocalLocality) { // order of locality stats is different to the Success case, where winter is // the local locality (and hence first in the list as per // HostsPerLocality::get()). - waitForLoadStatsRequest({localityStats("dragon", 2, 0, 0), localityStats("winter", 2, 0, 0)}); + waitForLoadStatsRequest( + {localityStats("dragon", 2, 0, 0, 2), localityStats("winter", 2, 0, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); // On slow machines, more than one load stats response may be pushed while we are simulating load. @@ -533,7 +538,7 @@ TEST_P(LoadStatsIntegrationTest, Error) { // This should count as "success" since non-5xx. sendAndReceiveUpstream(0, 404); - waitForLoadStatsRequest({localityStats("winter", 1, 1, 0)}); + waitForLoadStatsRequest({localityStats("winter", 1, 1, 0, 2)}); EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value()); EXPECT_LE(2, test_server_->counter("load_reporter.responses")->value()); @@ -553,7 +558,7 @@ TEST_P(LoadStatsIntegrationTest, InProgress) { requestLoadStatsResponse({"cluster_0"}); initiateClientConnection(); - waitForLoadStatsRequest({localityStats("winter", 0, 0, 1)}); + waitForLoadStatsRequest({localityStats("winter", 0, 0, 1, 1)}); waitForUpstreamResponse(0, 503); cleanupUpstreamAndDownstream();