Skip to content

Commit

Permalink
api: Add total_issued_requests to Upstream Locality and Endpoint Stat…
Browse files Browse the repository at this point in the history
…s. (envoyproxy#6692)

total_issued_requests field tracks the count of requests issued since
the last report. This field will be used for global load balancing
decisions.

Envoy maintains a stats counter rq_total to track total
requests made. By latching this counter when load reporting period
begins, we are able to count the total requests issued in a load
reporting interval. This information is then used to populate the field
total_issued_requests.

Risk Level: Low
Testing: //test/integration:load_stats_reporter passes.

Signed-off-by: Karthik Reddy <rekarthik@google.com>

Signed-off-by: Jeff Piazza <jeffpiazza@google.com>
  • Loading branch information
karthikbox authored and jeffpiazza-google committed May 3, 2019
1 parent dbbd2cb commit 9e84724
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 32 deletions.
32 changes: 11 additions & 21 deletions api/envoy/api/v2/endpoint/load_report.proto
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,6 @@ message UpstreamLocalityStats {
// collected from. Zone and region names could be empty if unknown.
core.Locality locality = 1;

// The total number of requests sent by this Envoy since the last report. This
// information is aggregated over all the upstream Endpoints. total_requests
// can be inferred from:
//
// .. code-block:: none
//
// total_requests = total_successful_requests + total_requests_in_progress +
// total_error_requests
//
// The total number of requests successfully completed by the endpoints in the
// locality.
uint64 total_successful_requests = 2;
Expand All @@ -45,6 +36,11 @@ message UpstreamLocalityStats {
// aggregated over all endpoints in the locality.
uint64 total_error_requests = 4;

// The total number of requests that were issued by this Envoy since
// the last report. This information is aggregated over all the
// upstream endpoints in the locality.
uint64 total_issued_requests = 8;

// Stats for multi-dimensional load balancing.
repeated EndpointLoadMetricStats load_metric_stats = 5;

Expand All @@ -66,16 +62,6 @@ message UpstreamEndpointStats {
// endpoint. Envoy will pass this directly to the management server.
google.protobuf.Struct metadata = 6;

// The total number of requests successfully completed by the endpoint. A
// single HTTP or gRPC request or stream is counted as one request. A TCP
// connection is also treated as one request. There is no explicit
// total_requests field below for an endpoint, but it may be inferred from:
//
// .. code-block:: none
//
// total_requests = total_successful_requests + total_requests_in_progress +
// total_error_requests
//
// The total number of requests successfully completed by the endpoints in the
// locality. These include non-5xx responses for HTTP, where errors
// originate at the client and the endpoint responded successfully. For gRPC,
Expand All @@ -97,6 +83,11 @@ message UpstreamEndpointStats {
// - DataLoss
uint64 total_error_requests = 4;

// The total number of requests that were issued to this endpoint
// since the last report. A single TCP connection, HTTP or gRPC
// request or stream is counted as one request.
uint64 total_issued_requests = 7;

// Stats for multi-dimensional load balancing.
repeated EndpointLoadMetricStats load_metric_stats = 5;
}
Expand Down Expand Up @@ -138,8 +129,7 @@ message ClusterStats {
//
// .. code-block:: none
//
// sum_locality(total_successful_requests) + sum_locality(total_requests_in_progress) +
// sum_locality(total_error_requests) + total_dropped_requests`
// sum_locality(total_issued_requests) + total_dropped_requests`
//
// The total number of dropped requests. This covers requests
// deliberately dropped by the drop_overload policy and circuit breaking.
Expand Down
1 change: 1 addition & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Version history
1.11.0 (Pending)
================
* access log: added a new field for response code details in :ref:`file access logger<config_access_log_format_response_code_details>` and :ref:`gRPC access logger<envoy_api_field_data.accesslog.v2.HTTPResponseProperties.response_code_details>`.
* api: track and report requests issued since last load report.
* dubbo_proxy: support the :ref:`Dubbo proxy filter <config_network_filters_dubbo_proxy>`.
* eds: added support to specify max time for which endpoints can be used :ref:`gRPC filter <envoy_api_msg_ClusterLoadAssignment.Policy>`.
* event: added :ref:`loop duration and poll delay statistics <operations_performance>`.
Expand Down
4 changes: 4 additions & 0 deletions source/common/upstream/load_stats_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ void LoadStatsReporter::sendLoadStatsRequest() {
uint64_t rq_success = 0;
uint64_t rq_error = 0;
uint64_t rq_active = 0;
uint64_t rq_issued = 0;
for (auto host : hosts) {
rq_success += host->stats().rq_success_.latch();
rq_error += host->stats().rq_error_.latch();
rq_active += host->stats().rq_active_.value();
rq_issued += host->stats().rq_total_.latch();
}
if (rq_success + rq_error + rq_active != 0) {
auto* locality_stats = cluster_stats->add_upstream_locality_stats();
Expand All @@ -75,6 +77,7 @@ void LoadStatsReporter::sendLoadStatsRequest() {
locality_stats->set_total_successful_requests(rq_success);
locality_stats->set_total_error_requests(rq_error);
locality_stats->set_total_requests_in_progress(rq_active);
locality_stats->set_total_issued_requests(rq_issued);
}
}
}
Expand Down Expand Up @@ -154,6 +157,7 @@ void LoadStatsReporter::startLoadReportPeriod() {
for (auto host : host_set->hosts()) {
host->stats().rq_success_.latch();
host->stats().rq_error_.latch();
host->stats().rq_total_.latch();
}
}
cluster.info()->loadReportStats().upstream_rq_dropped_.latch();
Expand Down
27 changes: 16 additions & 11 deletions test/integration/load_stats_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:

envoy::api::v2::endpoint::UpstreamLocalityStats localityStats(const std::string& sub_zone,
uint64_t success, uint64_t error,
uint64_t active,
uint64_t active, uint64_t issued,
uint32_t priority = 0) {
envoy::api::v2::endpoint::UpstreamLocalityStats locality_stats;
auto* locality = locality_stats.mutable_locality();
Expand All @@ -304,6 +304,7 @@ class LoadStatsIntegrationTest : public testing::TestWithParam<Network::Address:
locality_stats.set_total_successful_requests(success);
locality_stats.set_total_error_requests(error);
locality_stats.set_total_requests_in_progress(active);
locality_stats.set_total_issued_requests(issued);
locality_stats.set_priority(priority);
return locality_stats;
}
Expand Down Expand Up @@ -364,7 +365,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {
}

// Verify we do not get empty stats for non-zero priorities.
waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 2, 0, 0)});
waitForLoadStatsRequest(
{localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 2, 0, 0, 2)});

EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
// On slow machines, more than one load stats response may be pushed while we are simulating load.
Expand All @@ -381,7 +383,8 @@ TEST_P(LoadStatsIntegrationTest, Success) {

// No locality for priority=1 since there's no "winter" endpoints.
// The hosts for dragon were received because membership_total is accurate.
waitForLoadStatsRequest({localityStats("winter", 2, 0, 0), localityStats("dragon", 4, 0, 0)});
waitForLoadStatsRequest(
{localityStats("winter", 2, 0, 0, 2), localityStats("dragon", 4, 0, 0, 4)});

EXPECT_EQ(2, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(3, test_server_->counter("load_reporter.responses")->value());
Expand All @@ -397,7 +400,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
}

waitForLoadStatsRequest(
{localityStats("winter", 2, 0, 0, 1), localityStats("dragon", 2, 0, 0, 1)});
{localityStats("winter", 2, 0, 0, 2, 1), localityStats("dragon", 2, 0, 0, 2, 1)});
EXPECT_EQ(3, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(4, test_server_->counter("load_reporter.responses")->value());
EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
Expand All @@ -411,7 +414,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
sendAndReceiveUpstream(1);
}

waitForLoadStatsRequest({localityStats("winter", 1, 0, 0)});
waitForLoadStatsRequest({localityStats("winter", 1, 0, 0, 1)});
EXPECT_EQ(4, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(5, test_server_->counter("load_reporter.responses")->value());
EXPECT_EQ(0, test_server_->counter("load_reporter.errors")->value());
Expand All @@ -424,7 +427,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
sendAndReceiveUpstream(1);
sendAndReceiveUpstream(1);

waitForLoadStatsRequest({localityStats("winter", 3, 0, 0)});
waitForLoadStatsRequest({localityStats("winter", 3, 0, 0, 3)});

EXPECT_EQ(6, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(6, test_server_->counter("load_reporter.responses")->value());
Expand All @@ -438,7 +441,7 @@ TEST_P(LoadStatsIntegrationTest, Success) {
sendAndReceiveUpstream(1);
sendAndReceiveUpstream(1);

waitForLoadStatsRequest({localityStats("winter", 2, 0, 0)});
waitForLoadStatsRequest({localityStats("winter", 2, 0, 0, 2)});

EXPECT_EQ(8, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(7, test_server_->counter("load_reporter.responses")->value());
Expand Down Expand Up @@ -473,7 +476,8 @@ TEST_P(LoadStatsIntegrationTest, LocalityWeighted) {
sendAndReceiveUpstream(0);

// Verify we get the expect request distribution.
waitForLoadStatsRequest({localityStats("winter", 4, 0, 0), localityStats("dragon", 2, 0, 0)});
waitForLoadStatsRequest(
{localityStats("winter", 4, 0, 0, 4), localityStats("dragon", 2, 0, 0, 2)});

EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
// On slow machines, more than one load stats response may be pushed while we are simulating load.
Expand Down Expand Up @@ -506,7 +510,8 @@ TEST_P(LoadStatsIntegrationTest, NoLocalLocality) {
// order of locality stats is different to the Success case, where winter is
// the local locality (and hence first in the list as per
// HostsPerLocality::get()).
waitForLoadStatsRequest({localityStats("dragon", 2, 0, 0), localityStats("winter", 2, 0, 0)});
waitForLoadStatsRequest(
{localityStats("dragon", 2, 0, 0, 2), localityStats("winter", 2, 0, 0, 2)});

EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
// On slow machines, more than one load stats response may be pushed while we are simulating load.
Expand All @@ -533,7 +538,7 @@ TEST_P(LoadStatsIntegrationTest, Error) {
// This should count as "success" since non-5xx.
sendAndReceiveUpstream(0, 404);

waitForLoadStatsRequest({localityStats("winter", 1, 1, 0)});
waitForLoadStatsRequest({localityStats("winter", 1, 1, 0, 2)});

EXPECT_EQ(1, test_server_->counter("load_reporter.requests")->value());
EXPECT_LE(2, test_server_->counter("load_reporter.responses")->value());
Expand All @@ -553,7 +558,7 @@ TEST_P(LoadStatsIntegrationTest, InProgress) {

requestLoadStatsResponse({"cluster_0"});
initiateClientConnection();
waitForLoadStatsRequest({localityStats("winter", 0, 0, 1)});
waitForLoadStatsRequest({localityStats("winter", 0, 0, 1, 1)});

waitForUpstreamResponse(0, 503);
cleanupUpstreamAndDownstream();
Expand Down

0 comments on commit 9e84724

Please sign in to comment.