From db76822bc11de0e155e7d6f49b2c17db61c075a5 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 24 Oct 2016 20:48:32 -0700 Subject: [PATCH 01/13] initial change to balancing logic. --- include/envoy/upstream/upstream.h | 4 +- source/common/upstream/load_balancer_impl.cc | 115 +++++++++++---- source/common/upstream/load_balancer_impl.h | 19 ++- source/common/upstream/upstream_impl.h | 2 +- .../upstream/load_balancer_impl_test.cc | 137 ++++++++++++------ .../upstream/load_balancer_simulation_test.cc | 71 ++++++--- 6 files changed, 249 insertions(+), 99 deletions(-) diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 33251a0f0c18..3ce5f093fcf7 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -195,7 +195,9 @@ class HostSet { COUNTER(zone_over_percentage) \ COUNTER(zone_routing_sampled) \ COUNTER(zone_routing_no_sampled) \ - GAUGE (max_host_weight) + GAUGE (max_host_weight) \ + COUNTER(local_cluster_not_ok) \ + COUNTER(zone_number_differs) // clang-format on /** diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index b27125b425c3..9d32939ef30b 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -29,13 +29,26 @@ bool LoadBalancerBase::earlyExitNonZoneRouting() { return true; } + // If local cluster is not set, or we are in panic mode for it. + if (local_host_set_ == nullptr || local_host_set_->hosts().empty() || + isGlobalPanic(*local_host_set_)) { + stats_.local_cluster_not_ok_.inc(); + return true; + } + + // Same number of zones should be for local and upstream cluster. + if (host_set_.healthyHostsPerZone().size() != local_host_set_->healthyHostsPerZone().size()) { + stats_.zone_number_differs_.inc(); + return true; + } + return false; } -bool LoadBalancerBase::isGlobalPanic() { +bool LoadBalancerBase::isGlobalPanic(const HostSet& host_set) { uint64_t global_panic_threshold = std::min(100UL, runtime_.snapshot().getInteger("upstream.healthy_panic_threshold", 50)); - double healthy_percent = 100.0 * host_set_.healthyHosts().size() / host_set_.hosts().size(); + double healthy_percent = 100.0 * host_set.healthyHosts().size() / host_set.hosts().size(); // If the % of healthy hosts in the cluster is less than our panic threshold, we use all hosts. if (healthy_percent < global_panic_threshold) { @@ -46,48 +59,92 @@ bool LoadBalancerBase::isGlobalPanic() { return false; } -const std::vector& LoadBalancerBase::hostsToUse() { - ASSERT(host_set_.healthyHosts().size() <= host_set_.hosts().size()); +std::vector +LoadBalancerBase::calculateZonePercentage(const std::vector>& hosts_per_zone) { + std::vector percentage(hosts_per_zone.size()); - if (host_set_.hosts().empty() || isGlobalPanic()) { - return host_set_.hosts(); + uint64_t total_hosts = 0; + for (const auto& zone_hosts : hosts_per_zone) { + total_hosts += zone_hosts.size(); } - if (earlyExitNonZoneRouting()) { - return host_set_.healthyHosts(); + if (total_hosts != 0) { + size_t pos = 0; + for (const auto& zone_hosts : hosts_per_zone) { + percentage[pos++] = 10000ULL * zone_hosts.size() / total_hosts; + } } + return percentage; +} + +const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // At this point it's guaranteed to be at least 2 zones. - uint32_t number_of_zones = host_set_.healthyHostsPerZone().size(); - ASSERT(number_of_zones >= 2U); - const std::vector& local_zone_healthy_hosts = host_set_.healthyHostsPerZone()[0]; + ASSERT(host_set_.healthyHostsPerZone().size() >= 2U); - // If number of hosts in a local zone big enough then route all requests to the same zone. - if (local_zone_healthy_hosts.size() * number_of_zones >= host_set_.healthyHosts().size()) { + std::vector local_percentage = + calculateZonePercentage(local_host_set_->healthyHostsPerZone()); + std::vector upstream_percentage = + calculateZonePercentage(host_set_.healthyHostsPerZone()); + + // Try to push all of the requests to the same zone first. + // If we have lower percent of hosts in the local cluster in the same zone, + // we can push all of the requests directly to upstream cluster in the same zone. + if (upstream_percentage[0] >= local_percentage[0]) { stats_.zone_over_percentage_.inc(); - return local_zone_healthy_hosts; + return host_set_.healthyHostsPerZone()[0]; + } + + // If we cannot route all requests to the same zone, calculate what percentage can be routed. + // For example, if local percentage is 20% and upstream is 10% + // we can route only 50% of requests directly. + uint64_t local_percent_route = upstream_percentage[0] * 10000 / local_percentage[0]; + if (random_.random() % 10000 < local_percent_route) { + stats_.zone_routing_sampled_.inc(); + return host_set_.healthyHostsPerZone()[0]; } - // If local zone ratio is lower than expected we should only partially route requests from the - // same zone. - double zone_host_ratio = 1.0 * local_zone_healthy_hosts.size() / host_set_.healthyHosts().size(); - double ratio_to_route = zone_host_ratio * number_of_zones; + // At this point we should route cross zone as we cannot route to the local zone. + stats_.zone_routing_no_sampled_.inc(); + + std::vector capacity_left; + // Local zone does not have additional capacity (we already routed what we could), but + // put it to the capacity_left so that index in the array matches to the zone index. + capacity_left.push_back(0); + for (size_t i = 1; i < local_percentage.size(); ++i) { + // Only route to the zones that have additional capacity. + if (upstream_percentage[i] > local_percentage[i]) { + capacity_left.push_back(capacity_left[i - 1] + upstream_percentage[i] - local_percentage[i]); + } else { + capacity_left.push_back(capacity_left[i - 1]); + } + } - // Not zone routed requests will be distributed between all hosts and hence - // we need to route only fraction of req_percent_to_route to the local zone. - double actual_routing_ratio = (ratio_to_route - zone_host_ratio) / (1 - zone_host_ratio); + // Select specific zone for cross zone traffic based on the additional capacity in zones. + uint64_t threshold = random_.random() % capacity_left.back(); - // Scale actual_routing_ratio to improve precision. - const uint64_t scale_factor = 10000; - uint64_t zone_routing_threshold = scale_factor * actual_routing_ratio; + // This potentially can be optimized to be O(log(N)) where N is the number of zones. + // Linear scan should be faster for smaller N, in most of the scenarios N will be small. + int pos = 0; + while (threshold > capacity_left[pos]) { + pos++; + } - if (random_.random() % 10000 < zone_routing_threshold) { - stats_.zone_routing_sampled_.inc(); - return local_zone_healthy_hosts; - } else { - stats_.zone_routing_no_sampled_.inc(); + return host_set_.healthyHostsPerZone()[pos]; +} + +const std::vector& LoadBalancerBase::hostsToUse() { + ASSERT(host_set_.healthyHosts().size() <= host_set_.hosts().size()); + + if (host_set_.hosts().empty() || isGlobalPanic(host_set_)) { + return host_set_.hosts(); + } + + if (earlyExitNonZoneRouting()) { return host_set_.healthyHosts(); } + + return tryChooseLocalZoneHosts(); } ConstHostPtr RoundRobinLoadBalancer::chooseHost() { diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index b2bc09987641..42d3eea63823 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -26,8 +26,25 @@ class LoadBalancerBase { Runtime::RandomGenerator& random_; private: + /* + * @return decision on quick exit from zone aware host selection. + */ bool earlyExitNonZoneRouting(); - bool isGlobalPanic(); + + /** + * For the given host_set it @return if we should be in a panic mode or not. + * For example, if majority of hosts are unhealthy we'll be likely in a panic mode. + * In this case we'll route requests to hosts no matter if they are healthy or not. + */ + bool isGlobalPanic(const HostSet& host_set); + const std::vector& tryChooseLocalZoneHosts(); + + /** + * @return ratio of hosts in a given zone to total number of hosts. The result is scaled by 10000 + * multiplier. + */ + std::vector + calculateZonePercentage(const std::vector>& hosts_per_zone); const HostSet& host_set_; const HostSet* local_host_set_; diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index 8b69af6fd89f..3272ed059966 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -105,7 +105,7 @@ typedef std::shared_ptr>> HostListsPtr; typedef std::shared_ptr>> ConstHostListsPtr; /** - * Base clase for all clusters as well as thread local host sets. + * Base class for all clusters as well as thread local host sets. */ class HostSetImpl : public virtual HostSet { public: diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index 36576a32e91e..1016f40f9176 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -18,30 +18,48 @@ class RoundRobinLoadBalancerTest : public testing::Test { public: RoundRobinLoadBalancerTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) {} + void init(bool need_local_cluster) { + if (need_local_cluster) { + local_cluster_hosts_.reset(new HostSetImpl()); + lb_.reset(new RoundRobinLoadBalancer(cluster_, local_cluster_hosts_.get(), stats_, runtime_, + random_)); + } else { + lb_.reset(new RoundRobinLoadBalancer(cluster_, nullptr, stats_, runtime_, random_)); + } + } + NiceMock cluster_; NiceMock runtime_; NiceMock random_; Stats::IsolatedStoreImpl stats_store_; ClusterStats stats_; - RoundRobinLoadBalancer lb_{cluster_, nullptr, stats_, runtime_, random_}; + std::shared_ptr local_cluster_hosts_; + std::shared_ptr lb_; + std::vector empty_host_vector_; }; -TEST_F(RoundRobinLoadBalancerTest, NoHosts) { EXPECT_EQ(nullptr, lb_.chooseHost()); } +TEST_F(RoundRobinLoadBalancerTest, NoHosts) { + init(false); + EXPECT_EQ(nullptr, lb_->chooseHost()); +} TEST_F(RoundRobinLoadBalancerTest, SingleHost) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; cluster_.hosts_ = cluster_.healthy_hosts_; - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, Normal) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}; cluster_.hosts_ = cluster_.healthy_hosts_; - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}; cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), @@ -51,9 +69,9 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:85")}; - EXPECT_EQ(cluster_.hosts_[0], lb_.chooseHost()); - EXPECT_EQ(cluster_.hosts_[1], lb_.chooseHost()); - EXPECT_EQ(cluster_.hosts_[2], lb_.chooseHost()); + EXPECT_EQ(cluster_.hosts_[0], lb_->chooseHost()); + EXPECT_EQ(cluster_.hosts_[1], lb_->chooseHost()); + EXPECT_EQ(cluster_.hosts_[2], lb_->chooseHost()); // Take the threshold back above the panic threshold. cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), @@ -61,13 +79,15 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83")}; - EXPECT_EQ(cluster_.healthy_hosts_[3], lb_.chooseHost()); - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[3], lb_->chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); EXPECT_EQ(3UL, stats_.upstream_rq_lb_healthy_panic_.value()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { + init(false); + cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; @@ -83,22 +103,29 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.min_cluster_size", 6)) .WillRepeatedly(Return(6)); - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.zone_cluster_too_small_.value()); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); EXPECT_EQ(2U, stats_.zone_cluster_too_small_.value()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}}; + init(true); + + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); + + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); @@ -109,33 +136,48 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { .WillRepeatedly(Return(3)); // There is only one host in the given zone for zone aware routing. - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.zone_over_percentage_.value()); - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); EXPECT_EQ(2U, stats_.zone_over_percentage_.value()); // Disable runtime global zone routing. EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(false)); - EXPECT_EQ(cluster_.healthy_hosts_[2], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}}; + init(true); + + HostVectorPtr upstream_hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")})); + HostVectorPtr local_hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:0"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:1"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:2")})); + + HostListsPtr upstream_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}})); + + HostListsPtr local_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:0")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:1")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:2")}})); + + cluster_.healthy_hosts_ = *upstream_hosts; + cluster_.hosts_ = *upstream_hosts; + cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, + local_hosts_per_zone, empty_host_vector_, empty_host_vector_); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); @@ -146,15 +188,18 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { .WillRepeatedly(Return(5)); // There is only one host in the given zone for zone aware routing. - EXPECT_CALL(random_, random()).WillOnce(Return(1000)); - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); + EXPECT_CALL(random_, random()).WillOnce(Return(100)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.zone_routing_sampled_.value()); - EXPECT_CALL(random_, random()).WillOnce(Return(6500)); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); + // Force request out of small zone. + EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[1][1], lb_->chooseHost()); EXPECT_EQ(1U, stats_.zone_routing_no_sampled_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { + init(true); + cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}}; @@ -163,10 +208,12 @@ TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingNotHealthy) { + init(true); + cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; @@ -181,8 +228,8 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingNotHealthy) { .WillRepeatedly(Return(50)); // local zone has no healthy hosts, take from the all healthy hosts. - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); } class LeastRequestLoadBalancerTest : public testing::Test { diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index bb04b638cc92..a6d88b940b1c 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -39,32 +39,51 @@ class DISABLED_SimulationTest : public testing::Test { */ void run(std::vector originating_cluster, std::vector all_destination_cluster, std::vector healthy_destination_cluster) { + local_host_set_ = new HostSetImpl(); + // TODO: make load balancer per originating cluster host. + RandomLoadBalancer lb(cluster_, local_host_set_, stats_, runtime_, random_); - std::vector> per_zone_hosts = - generateHostsPerZone(healthy_destination_cluster); + HostListsPtr upstream_per_zone_hosts = generateHostsPerZone(healthy_destination_cluster); + HostListsPtr local_per_zone_hosts = generateHostsPerZone(originating_cluster); - std::vector originating_hosts = generateHostList(originating_cluster); - cluster_.healthy_hosts_ = generateHostList(healthy_destination_cluster); - cluster_.hosts_ = generateHostList(all_destination_cluster); + HostVectorPtr originating_hosts = generateHostList(originating_cluster); + HostVectorPtr healthy_destination = generateHostList(healthy_destination_cluster); + cluster_.healthy_hosts_ = *healthy_destination; + HostVectorPtr all_destination = generateHostList(all_destination_cluster); + cluster_.hosts_ = *all_destination; std::map hits; for (uint32_t i = 0; i < total_number_of_requests; ++i) { - HostPtr from_host = selectOriginatingHost(originating_hosts); + HostPtr from_host = selectOriginatingHost(*originating_hosts); uint32_t from_zone = atoi(from_host->zone().c_str()); - std::vector> per_zone_upstream; - per_zone_upstream.push_back(per_zone_hosts[from_zone]); - for (size_t pos = 0; pos < per_zone_hosts.size(); ++pos) { - if (pos == from_zone) { + // Populate host set for upstream cluster. + HostListsPtr per_zone_upstream(new std::vector>()); + per_zone_upstream->push_back((*upstream_per_zone_hosts)[from_zone]); + for (size_t zone = 0; zone < upstream_per_zone_hosts->size(); ++zone) { + if (zone == from_zone) { continue; } - per_zone_upstream.push_back(per_zone_hosts[pos]); + per_zone_upstream->push_back((*upstream_per_zone_hosts)[zone]); } + cluster_.hosts_per_zone_ = *per_zone_upstream; + cluster_.healthy_hosts_per_zone_ = *per_zone_upstream; + + // Populate host set for originating cluster. + HostListsPtr per_zone_local(new std::vector>()); + per_zone_local->push_back((*local_per_zone_hosts)[from_zone]); + for (size_t zone = 0; zone < local_per_zone_hosts->size(); ++zone) { + if (zone == from_zone) { + continue; + } - cluster_.healthy_hosts_per_zone_ = std::move(per_zone_upstream); + per_zone_local->push_back((*local_per_zone_hosts)[zone]); + } + local_host_set_->updateHosts(originating_hosts, originating_hosts, per_zone_local, + per_zone_local, empty_vector_, empty_vector_); - ConstHostPtr selected = lb_.chooseHost(); + ConstHostPtr selected = lb.chooseHost(); hits[selected->url()]++; } @@ -83,13 +102,13 @@ class DISABLED_SimulationTest : public testing::Test { * Generate list of hosts based on number of hosts in the given zone. * @param hosts number of hosts per zone. */ - std::vector generateHostList(const std::vector& hosts) { - std::vector ret; + HostVectorPtr generateHostList(const std::vector& hosts) { + HostVectorPtr ret(new std::vector()); for (size_t i = 0; i < hosts.size(); ++i) { const std::string zone = std::to_string(i); for (uint32_t j = 0; j < hosts[i]; ++j) { const std::string url = fmt::format("tcp://host.{}.{}:80", i, j); - ret.push_back(newTestHost(cluster_, url, 1, zone)); + ret->push_back(newTestHost(cluster_, url, 1, zone)); } } @@ -100,8 +119,8 @@ class DISABLED_SimulationTest : public testing::Test { * Generate hosts by zone. * @param hosts number of hosts per zone. */ - std::vector> generateHostsPerZone(const std::vector& hosts) { - std::vector> ret; + HostListsPtr generateHostsPerZone(const std::vector& hosts) { + HostListsPtr ret(new std::vector>()); for (size_t i = 0; i < hosts.size(); ++i) { const std::string zone = std::to_string(i); std::vector zone_hosts; @@ -111,21 +130,21 @@ class DISABLED_SimulationTest : public testing::Test { zone_hosts.push_back(newTestHost(cluster_, url, 1, zone)); } - ret.push_back(std::move(zone_hosts)); + ret->push_back(std::move(zone_hosts)); } return ret; }; - const uint32_t total_number_of_requests = 3000000; + const uint32_t total_number_of_requests = 300000; + std::vector empty_vector_; + HostSetImpl* local_host_set_; NiceMock cluster_; NiceMock runtime_; Runtime::RandomGeneratorImpl random_; Stats::IsolatedStoreImpl stats_store_; ClusterStats stats_; - // TODO: make per originating host load balancer. - RandomLoadBalancer lb_{cluster_, nullptr, stats_, runtime_, random_}; }; TEST_F(DISABLED_SimulationTest, strictlyEqualDistribution) { @@ -148,4 +167,12 @@ TEST_F(DISABLED_SimulationTest, unequalZoneDistribution4) { run({20U, 20U, 21U}, {4U, 4U, 5U}, {4U, 5U, 5U}); } +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution5) { + run({3U, 2U, 5U}, {4U, 4U, 5U}, {4U, 5U, 5U}); +} + +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution6) { + run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); +} + } // Upstream \ No newline at end of file From db1df907c2811312d804493d766eb6af4ce8a54e Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Wed, 26 Oct 2016 16:41:42 -0700 Subject: [PATCH 02/13] more requests, fix number of hosts. --- .../common/upstream/load_balancer_simulation_test.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index a6d88b940b1c..f206a2a2e61b 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -87,9 +87,11 @@ class DISABLED_SimulationTest : public testing::Test { hits[selected->url()]++; } + double mean = total_number_of_requests * 1.0 / hits.size(); for (const auto& host_hit_num_pair : hits) { - std::cout << fmt::format("url:{}, hits:{}", host_hit_num_pair.first, host_hit_num_pair.second) - << std::endl; + double percent_diff = std::abs((mean - host_hit_num_pair.second) / mean) * 100; + std::cout << fmt::format("url:{}, hits:{}, {} % from mean", host_hit_num_pair.first, + host_hit_num_pair.second, percent_diff) << std::endl; } } @@ -136,7 +138,7 @@ class DISABLED_SimulationTest : public testing::Test { return ret; }; - const uint32_t total_number_of_requests = 300000; + const uint32_t total_number_of_requests = 1000000; std::vector empty_vector_; HostSetImpl* local_host_set_; @@ -164,11 +166,11 @@ TEST_F(DISABLED_SimulationTest, unequalZoneDistribution3) { } TEST_F(DISABLED_SimulationTest, unequalZoneDistribution4) { - run({20U, 20U, 21U}, {4U, 4U, 5U}, {4U, 5U, 5U}); + run({20U, 20U, 21U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } TEST_F(DISABLED_SimulationTest, unequalZoneDistribution5) { - run({3U, 2U, 5U}, {4U, 4U, 5U}, {4U, 5U, 5U}); + run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } TEST_F(DISABLED_SimulationTest, unequalZoneDistribution6) { From c83b5af060ace51864d8fec8b8bdec75ae8f39ee Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Wed, 26 Oct 2016 20:37:18 -0700 Subject: [PATCH 03/13] Add more tests around load balancing. --- .../upstream/load_balancer_impl_test.cc | 85 ++++++++++++++----- 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index 1016f40f9176..ecb042b31ca7 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -86,32 +86,74 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { - init(false); + init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}}; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) + .WillRepeatedly(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); - EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.min_cluster_size", 6)) + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) .WillRepeatedly(Return(6)); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.zone_cluster_too_small_.value()); EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); EXPECT_EQ(2U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); + EXPECT_EQ(3U, stats_.zone_cluster_too_small_.value()); + + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) + .WillRepeatedly(Return(1)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); + EXPECT_EQ(3U, stats_.zone_cluster_too_small_.value()); } -TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { +TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr upstream_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); + HostListsPtr local_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}})); + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, local_hosts_per_zone, local_hosts_per_zone, + empty_host_vector_, empty_host_vector_); + + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) + .WillRepeatedly(Return(50)); + EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) + .WillOnce(Return(1)); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.zone_number_differs_.value()); +} + +TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { + init(true); HostVectorPtr hosts( new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), @@ -149,7 +191,6 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { init(true); - HostVectorPtr upstream_hosts( new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), @@ -199,21 +240,21 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}})); - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}}; - - EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)).Times(0); - EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) - .WillRepeatedly(Return(50)); - + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingNotHealthy) { init(true); - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; From fb39d7ae8c145051b339cf89829fe5b6db5c02e1 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Thu, 27 Oct 2016 14:55:05 -0700 Subject: [PATCH 04/13] comments. --- include/envoy/upstream/upstream.h | 4 +- source/common/upstream/load_balancer_impl.cc | 68 +++++++++++-------- source/common/upstream/load_balancer_impl.h | 9 +-- .../upstream/load_balancer_impl_test.cc | 6 +- 4 files changed, 49 insertions(+), 38 deletions(-) diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 3ce5f093fcf7..584eb510fc77 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -192,9 +192,9 @@ class HostSet { COUNTER(update_success) \ COUNTER(update_failure) \ COUNTER(zone_cluster_too_small) \ - COUNTER(zone_over_percentage) \ + COUNTER(zone_routing_all_directly) \ COUNTER(zone_routing_sampled) \ - COUNTER(zone_routing_no_sampled) \ + COUNTER(zone_routing_cross_zone) \ GAUGE (max_host_weight) \ COUNTER(local_cluster_not_ok) \ COUNTER(zone_number_differs) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 9d32939ef30b..20c55d306820 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -59,39 +59,38 @@ bool LoadBalancerBase::isGlobalPanic(const HostSet& host_set) { return false; } -std::vector -LoadBalancerBase::calculateZonePercentage(const std::vector>& hosts_per_zone) { - std::vector percentage(hosts_per_zone.size()); - +void LoadBalancerBase::calculateZonePercentage( + const std::vector>& hosts_per_zone, uint64_t* ret) { uint64_t total_hosts = 0; for (const auto& zone_hosts : hosts_per_zone) { total_hosts += zone_hosts.size(); } if (total_hosts != 0) { - size_t pos = 0; + size_t i = 0; for (const auto& zone_hosts : hosts_per_zone) { - percentage[pos++] = 10000ULL * zone_hosts.size() / total_hosts; + ret[i++] = 10000ULL * zone_hosts.size() / total_hosts; } } - - return percentage; } const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // At this point it's guaranteed to be at least 2 zones. - ASSERT(host_set_.healthyHostsPerZone().size() >= 2U); + size_t number_of_zones = host_set_.healthyHostsPerZone().size(); + + ASSERT(number_of_zones >= 2U); - std::vector local_percentage = - calculateZonePercentage(local_host_set_->healthyHostsPerZone()); - std::vector upstream_percentage = - calculateZonePercentage(host_set_.healthyHostsPerZone()); + uint64_t local_percentage[number_of_zones]; + calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage); + + uint64_t upstream_percentage[number_of_zones]; + calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage); // Try to push all of the requests to the same zone first. // If we have lower percent of hosts in the local cluster in the same zone, // we can push all of the requests directly to upstream cluster in the same zone. if (upstream_percentage[0] >= local_percentage[0]) { - stats_.zone_over_percentage_.inc(); + stats_.zone_routing_all_directly_.inc(); return host_set_.healthyHostsPerZone()[0]; } @@ -104,33 +103,44 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { return host_set_.healthyHostsPerZone()[0]; } - // At this point we should route cross zone as we cannot route to the local zone. - stats_.zone_routing_no_sampled_.inc(); + // At this point we must route cross zone as we cannot route to the local zone. + stats_.zone_routing_cross_zone_.inc(); + + // Local zone does not have additional capacity (we have already routed what we could). + // Now we need to figure out how much traffic we can route cross zone and to which exact zone + // we should route. Percentage of requests routed cross zone to a specific zone should be + // proportional to the residual capacity upstream zone has. + // + // Residual_capacity contains capacity left in a given zone, we keep accumulating residual + // capacity to make search for sampled value easier. + uint64_t residual_capacity[number_of_zones]; - std::vector capacity_left; - // Local zone does not have additional capacity (we already routed what we could), but - // put it to the capacity_left so that index in the array matches to the zone index. - capacity_left.push_back(0); - for (size_t i = 1; i < local_percentage.size(); ++i) { + // Local zone (index 0) does not have residual capacity as we have routed all we could. + residual_capacity[0] = 0; + for (size_t i = 1; i < number_of_zones; ++i) { // Only route to the zones that have additional capacity. if (upstream_percentage[i] > local_percentage[i]) { - capacity_left.push_back(capacity_left[i - 1] + upstream_percentage[i] - local_percentage[i]); + residual_capacity[i] = + residual_capacity[i - 1] + upstream_percentage[i] - local_percentage[i]; } else { - capacity_left.push_back(capacity_left[i - 1]); + // Zone with index "i" does not have residual capacity, but we keep accumulating previous + // values to make search easier on the next step. + residual_capacity[i] = residual_capacity[i - 1]; } } - // Select specific zone for cross zone traffic based on the additional capacity in zones. - uint64_t threshold = random_.random() % capacity_left.back(); + // Random simulation to select specific zone for cross zone traffic based on the additional + // capacity in zones. + uint64_t threshold = random_.random() % residual_capacity[number_of_zones - 1]; // This potentially can be optimized to be O(log(N)) where N is the number of zones. // Linear scan should be faster for smaller N, in most of the scenarios N will be small. - int pos = 0; - while (threshold > capacity_left[pos]) { - pos++; + int i = 0; + while (threshold > residual_capacity[i]) { + i++; } - return host_set_.healthyHostsPerZone()[pos]; + return host_set_.healthyHostsPerZone()[i]; } const std::vector& LoadBalancerBase::hostsToUse() { diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index 42d3eea63823..e05a7b79e805 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -40,11 +40,12 @@ class LoadBalancerBase { const std::vector& tryChooseLocalZoneHosts(); /** - * @return ratio of hosts in a given zone to total number of hosts. The result is scaled by 10000 - * multiplier. + * @return ratio of hosts in a given zone to total number of hosts in ret param. + * The result is scaled by 10000 multiplier. + * Caller is responsible for allocation/de-allocation of ret. */ - std::vector - calculateZonePercentage(const std::vector>& hosts_per_zone); + void calculateZonePercentage(const std::vector>& hosts_per_zone, + uint64_t* ret); const HostSet& host_set_; const HostSet* local_host_set_; diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index ecb042b31ca7..d727b96c1845 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -179,9 +179,9 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { // There is only one host in the given zone for zone aware routing. EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_over_percentage_.value()); + EXPECT_EQ(1U, stats_.zone_routing_all_directly_.value()); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(2U, stats_.zone_over_percentage_.value()); + EXPECT_EQ(2U, stats_.zone_routing_all_directly_.value()); // Disable runtime global zone routing. EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) @@ -235,7 +235,7 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { // Force request out of small zone. EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[1][1], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_no_sampled_.value()); + EXPECT_EQ(1U, stats_.zone_routing_cross_zone_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { From ab350590e5e73b96dafd010c4654c35ddd985ce9 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Sun, 30 Oct 2016 15:23:12 -0700 Subject: [PATCH 05/13] Enable tests, it takes around 25sec to complete all those tests. --- .../upstream/load_balancer_simulation_test.cc | 33 +++++++------------ 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index f206a2a2e61b..4994b334da2e 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -16,11 +16,11 @@ static HostPtr newTestHost(const Upstream::Cluster& cluster, const std::string& } /** - * This test is for simulation only and should not be run as part of unit tests. + * Simulation test for zone aware routing. */ -class DISABLED_SimulationTest : public testing::Test { +class SimulationTest : public testing::Test { public: - DISABLED_SimulationTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) { + SimulationTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) { ON_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50U)) .WillByDefault(Return(50U)); ON_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) @@ -90,6 +90,7 @@ class DISABLED_SimulationTest : public testing::Test { double mean = total_number_of_requests * 1.0 / hits.size(); for (const auto& host_hit_num_pair : hits) { double percent_diff = std::abs((mean - host_hit_num_pair.second) / mean) * 100; + EXPECT_TRUE(2.0 >= percent_diff); std::cout << fmt::format("url:{}, hits:{}, {} % from mean", host_hit_num_pair.first, host_hit_num_pair.second, percent_diff) << std::endl; } @@ -138,7 +139,7 @@ class DISABLED_SimulationTest : public testing::Test { return ret; }; - const uint32_t total_number_of_requests = 1000000; + const uint32_t total_number_of_requests = 600000; std::vector empty_vector_; HostSetImpl* local_host_set_; @@ -149,32 +150,22 @@ class DISABLED_SimulationTest : public testing::Test { ClusterStats stats_; }; -TEST_F(DISABLED_SimulationTest, strictlyEqualDistribution) { - run({1U, 1U, 1U}, {3U, 3U, 3U}, {3U, 3U, 3U}); -} +TEST_F(SimulationTest, strictlyEqualDistribution) { run({1U, 1U, 1U}, {3U, 3U, 3U}, {3U, 3U, 3U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution) { - run({1U, 1U, 1U}, {2U, 5U, 5U}, {2U, 5U, 5U}); -} +TEST_F(SimulationTest, unequalZoneDistribution) { run({1U, 1U, 1U}, {2U, 5U, 5U}, {2U, 5U, 5U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution2) { - run({1U, 1U, 1U}, {5U, 5U, 6U}, {5U, 5U, 6U}); -} +TEST_F(SimulationTest, unequalZoneDistribution2) { run({1U, 1U, 1U}, {5U, 5U, 6U}, {5U, 5U, 6U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution3) { +TEST_F(SimulationTest, unequalZoneDistribution3) { run({1U, 1U, 1U}, {10U, 10U, 10U}, {10U, 8U, 8U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution4) { +TEST_F(SimulationTest, unequalZoneDistribution4) { run({20U, 20U, 21U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution5) { - run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); -} +TEST_F(SimulationTest, unequalZoneDistribution5) { run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } -TEST_F(DISABLED_SimulationTest, unequalZoneDistribution6) { - run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); -} +TEST_F(SimulationTest, unequalZoneDistribution6) { run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); } } // Upstream \ No newline at end of file From c7710ed2d01034c788e521a09497493fd7c8aeb3 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Sun, 30 Oct 2016 16:29:09 -0700 Subject: [PATCH 06/13] make 3% as we do not run through that many requests. --- test/common/upstream/load_balancer_simulation_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index 4994b334da2e..771d509abf8a 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -90,7 +90,7 @@ class SimulationTest : public testing::Test { double mean = total_number_of_requests * 1.0 / hits.size(); for (const auto& host_hit_num_pair : hits) { double percent_diff = std::abs((mean - host_hit_num_pair.second) / mean) * 100; - EXPECT_TRUE(2.0 >= percent_diff); + EXPECT_TRUE(3.0 >= percent_diff); std::cout << fmt::format("url:{}, hits:{}, {} % from mean", host_hit_num_pair.first, host_hit_num_pair.second, percent_diff) << std::endl; } From 82f2696aa6410845dd7dc4e92a8bd7a268ca1054 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 10:59:24 -0700 Subject: [PATCH 07/13] Revert "Enable tests, it takes around 25sec to complete all those tests." This reverts commit ab350590e5e73b96dafd010c4654c35ddd985ce9. Conflicts: test/common/upstream/load_balancer_simulation_test.cc --- .../upstream/load_balancer_simulation_test.cc | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index 771d509abf8a..f206a2a2e61b 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -16,11 +16,11 @@ static HostPtr newTestHost(const Upstream::Cluster& cluster, const std::string& } /** - * Simulation test for zone aware routing. + * This test is for simulation only and should not be run as part of unit tests. */ -class SimulationTest : public testing::Test { +class DISABLED_SimulationTest : public testing::Test { public: - SimulationTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) { + DISABLED_SimulationTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) { ON_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50U)) .WillByDefault(Return(50U)); ON_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) @@ -90,7 +90,6 @@ class SimulationTest : public testing::Test { double mean = total_number_of_requests * 1.0 / hits.size(); for (const auto& host_hit_num_pair : hits) { double percent_diff = std::abs((mean - host_hit_num_pair.second) / mean) * 100; - EXPECT_TRUE(3.0 >= percent_diff); std::cout << fmt::format("url:{}, hits:{}, {} % from mean", host_hit_num_pair.first, host_hit_num_pair.second, percent_diff) << std::endl; } @@ -139,7 +138,7 @@ class SimulationTest : public testing::Test { return ret; }; - const uint32_t total_number_of_requests = 600000; + const uint32_t total_number_of_requests = 1000000; std::vector empty_vector_; HostSetImpl* local_host_set_; @@ -150,22 +149,32 @@ class SimulationTest : public testing::Test { ClusterStats stats_; }; -TEST_F(SimulationTest, strictlyEqualDistribution) { run({1U, 1U, 1U}, {3U, 3U, 3U}, {3U, 3U, 3U}); } +TEST_F(DISABLED_SimulationTest, strictlyEqualDistribution) { + run({1U, 1U, 1U}, {3U, 3U, 3U}, {3U, 3U, 3U}); +} -TEST_F(SimulationTest, unequalZoneDistribution) { run({1U, 1U, 1U}, {2U, 5U, 5U}, {2U, 5U, 5U}); } +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution) { + run({1U, 1U, 1U}, {2U, 5U, 5U}, {2U, 5U, 5U}); +} -TEST_F(SimulationTest, unequalZoneDistribution2) { run({1U, 1U, 1U}, {5U, 5U, 6U}, {5U, 5U, 6U}); } +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution2) { + run({1U, 1U, 1U}, {5U, 5U, 6U}, {5U, 5U, 6U}); +} -TEST_F(SimulationTest, unequalZoneDistribution3) { +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution3) { run({1U, 1U, 1U}, {10U, 10U, 10U}, {10U, 8U, 8U}); } -TEST_F(SimulationTest, unequalZoneDistribution4) { +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution4) { run({20U, 20U, 21U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } -TEST_F(SimulationTest, unequalZoneDistribution5) { run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); } +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution5) { + run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); +} -TEST_F(SimulationTest, unequalZoneDistribution6) { run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); } +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution6) { + run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); +} } // Upstream \ No newline at end of file From 879445df94f1bff98abf74a8d133125b72fa2956 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 11:34:14 -0700 Subject: [PATCH 08/13] more comments on multiplier and method. --- source/common/upstream/load_balancer_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index e05a7b79e805..5fdf1d2a6665 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -40,8 +40,8 @@ class LoadBalancerBase { const std::vector& tryChooseLocalZoneHosts(); /** - * @return ratio of hosts in a given zone to total number of hosts in ret param. - * The result is scaled by 10000 multiplier. + * @return (number of hosts in a given zone)/(total number of hosts) in ret param. + * The result is stored as integer number and scaled by 10000 multiplier for better precision. * Caller is responsible for allocation/de-allocation of ret. */ void calculateZonePercentage(const std::vector>& hosts_per_zone, From c756172e02877e0333f1c3d90251c14ffa22dff1 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 12:01:54 -0700 Subject: [PATCH 09/13] fix wording. --- source/common/upstream/load_balancer_impl.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 20c55d306820..03a417d99233 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -108,10 +108,10 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // Local zone does not have additional capacity (we have already routed what we could). // Now we need to figure out how much traffic we can route cross zone and to which exact zone - // we should route. Percentage of requests routed cross zone to a specific zone should be + // we should route. Percentage of requests routed cross zone to a specific zone needed be // proportional to the residual capacity upstream zone has. // - // Residual_capacity contains capacity left in a given zone, we keep accumulating residual + // residual_capacity contains capacity left in a given zone, we keep accumulating residual // capacity to make search for sampled value easier. uint64_t residual_capacity[number_of_zones]; @@ -129,7 +129,7 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { } } - // Random simulation to select specific zone for cross zone traffic based on the additional + // Random sampling to select specific zone for cross zone traffic based on the additional // capacity in zones. uint64_t threshold = random_.random() % residual_capacity[number_of_zones - 1]; From f431d663dcd556884c77e1e3633f627db7420136 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 14:08:47 -0700 Subject: [PATCH 10/13] comments. --- include/envoy/upstream/upstream.h | 18 +++++++------- source/common/upstream/load_balancer_impl.cc | 24 +++++++++++++------ source/common/upstream/load_balancer_impl.h | 4 ++++ .../upstream/load_balancer_impl_test.cc | 20 ++++++++-------- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 584eb510fc77..f57a73b2c5ce 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -185,19 +185,19 @@ class HostSet { COUNTER(upstream_rq_retry) \ COUNTER(upstream_rq_retry_success) \ COUNTER(upstream_rq_retry_overflow) \ - COUNTER(upstream_rq_lb_healthy_panic) \ + COUNTER(lb_healthy_panic) \ + COUNTER(lb_local_cluster_not_ok) \ + COUNTER(lb_zone_cluster_too_small) \ + COUNTER(lb_zone_number_differs) \ + COUNTER(lb_zone_routing_all_directly) \ + COUNTER(lb_zone_routing_sampled) \ + COUNTER(lb_zone_routing_cross_zone) \ + GAUGE (max_host_weight) \ COUNTER(membership_change) \ GAUGE (membership_total) \ COUNTER(update_attempt) \ COUNTER(update_success) \ - COUNTER(update_failure) \ - COUNTER(zone_cluster_too_small) \ - COUNTER(zone_routing_all_directly) \ - COUNTER(zone_routing_sampled) \ - COUNTER(zone_routing_cross_zone) \ - GAUGE (max_host_weight) \ - COUNTER(local_cluster_not_ok) \ - COUNTER(zone_number_differs) + COUNTER(update_failure) // clang-format on /** diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 03a417d99233..32f45ab6bbd6 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -25,20 +25,20 @@ bool LoadBalancerBase::earlyExitNonZoneRouting() { runtime_.snapshot().getInteger("upstream.zone_routing.min_cluster_size", 6U); if (host_set_.healthyHosts().size() < min_cluster_size) { - stats_.zone_cluster_too_small_.inc(); + stats_.lb_zone_cluster_too_small_.inc(); return true; } // If local cluster is not set, or we are in panic mode for it. if (local_host_set_ == nullptr || local_host_set_->hosts().empty() || isGlobalPanic(*local_host_set_)) { - stats_.local_cluster_not_ok_.inc(); + stats_.lb_local_cluster_not_ok_.inc(); return true; } // Same number of zones should be for local and upstream cluster. if (host_set_.healthyHostsPerZone().size() != local_host_set_->healthyHostsPerZone().size()) { - stats_.zone_number_differs_.inc(); + stats_.lb_zone_number_differs_.inc(); return true; } @@ -52,7 +52,7 @@ bool LoadBalancerBase::isGlobalPanic(const HostSet& host_set) { // If the % of healthy hosts in the cluster is less than our panic threshold, we use all hosts. if (healthy_percent < global_panic_threshold) { - stats_.upstream_rq_lb_healthy_panic_.inc(); + stats_.lb_healthy_panic_.inc(); return true; } @@ -79,6 +79,7 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { size_t number_of_zones = host_set_.healthyHostsPerZone().size(); ASSERT(number_of_zones >= 2U); + ASSERT(local_host_set_->healthyHostsPerZone().size == host_set_.healthyHostsPerZone().size()); uint64_t local_percentage[number_of_zones]; calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage); @@ -90,7 +91,7 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // If we have lower percent of hosts in the local cluster in the same zone, // we can push all of the requests directly to upstream cluster in the same zone. if (upstream_percentage[0] >= local_percentage[0]) { - stats_.zone_routing_all_directly_.inc(); + stats_.lb_zone_routing_all_directly_.inc(); return host_set_.healthyHostsPerZone()[0]; } @@ -99,12 +100,12 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // we can route only 50% of requests directly. uint64_t local_percent_route = upstream_percentage[0] * 10000 / local_percentage[0]; if (random_.random() % 10000 < local_percent_route) { - stats_.zone_routing_sampled_.inc(); + stats_.lb_zone_routing_sampled_.inc(); return host_set_.healthyHostsPerZone()[0]; } // At this point we must route cross zone as we cannot route to the local zone. - stats_.zone_routing_cross_zone_.inc(); + stats_.lb_zone_routing_cross_zone_.inc(); // Local zone does not have additional capacity (we have already routed what we could). // Now we need to figure out how much traffic we can route cross zone and to which exact zone @@ -113,6 +114,15 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // // residual_capacity contains capacity left in a given zone, we keep accumulating residual // capacity to make search for sampled value easier. + // For example, if we have the following upstream and local percentage: + // local_percentage: 40000 40000 20000 + // upstream_percentage: 25000 50000 25000 + // Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to + // bucket sizes (residual capacity). For simplicity of finding where specific + // sampled value is, we accumulate values in residual capacity. This is what it will look like: + // residual_capacity: 0 10000 15000 + // Now to find zone to route (bucket) we could simply iterate over residual_capacity searching where + // sampled value is placed. uint64_t residual_capacity[number_of_zones]; // Local zone (index 0) does not have residual capacity as we have routed all we could. diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index 5fdf1d2a6665..265056f060e6 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -37,6 +37,10 @@ class LoadBalancerBase { * In this case we'll route requests to hosts no matter if they are healthy or not. */ bool isGlobalPanic(const HostSet& host_set); + + /** + * Try to select upstream hosts from the same zone. + */ const std::vector& tryChooseLocalZoneHosts(); /** diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index d727b96c1845..3011b90c6252 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -82,7 +82,7 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { EXPECT_EQ(cluster_.healthy_hosts_[3], lb_->chooseHost()); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); - EXPECT_EQ(3UL, stats_.upstream_rq_lb_healthy_panic_.value()); + EXPECT_EQ(3UL, stats_.lb_healthy_panic_.value()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { @@ -110,16 +110,16 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { .WillRepeatedly(Return(6)); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); - EXPECT_EQ(2U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(2U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); - EXPECT_EQ(3U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) .WillRepeatedly(Return(1)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(3U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { @@ -149,7 +149,7 @@ TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) .WillOnce(Return(1)); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_number_differs_.value()); + EXPECT_EQ(1U, stats_.lb_zone_number_differs_.value()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { @@ -179,9 +179,9 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { // There is only one host in the given zone for zone aware routing. EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_all_directly_.value()); + EXPECT_EQ(1U, stats_.lb_zone_routing_all_directly_.value()); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(2U, stats_.zone_routing_all_directly_.value()); + EXPECT_EQ(2U, stats_.lb_zone_routing_all_directly_.value()); // Disable runtime global zone routing. EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) @@ -231,11 +231,11 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { // There is only one host in the given zone for zone aware routing. EXPECT_CALL(random_, random()).WillOnce(Return(100)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_sampled_.value()); + EXPECT_EQ(1U, stats_.lb_zone_routing_sampled_.value()); // Force request out of small zone. EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[1][1], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_cross_zone_.value()); + EXPECT_EQ(1U, stats_.lb_zone_routing_cross_zone_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { From 193f2155cd0cd2ac6ef910b9cd96ac57637d7da4 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 14:09:58 -0700 Subject: [PATCH 11/13] sort. --- include/envoy/upstream/upstream.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index f57a73b2c5ce..11cbe8928bf5 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -148,6 +148,13 @@ class HostSet { */ // clang-format off #define ALL_CLUSTER_STATS(COUNTER, GAUGE, TIMER) \ + COUNTER(lb_healthy_panic) \ + COUNTER(lb_local_cluster_not_ok) \ + COUNTER(lb_zone_cluster_too_small) \ + COUNTER(lb_zone_number_differs) \ + COUNTER(lb_zone_routing_all_directly) \ + COUNTER(lb_zone_routing_sampled) \ + COUNTER(lb_zone_routing_cross_zone) \ COUNTER(upstream_cx_total) \ GAUGE (upstream_cx_active) \ COUNTER(upstream_cx_http1_total) \ @@ -185,13 +192,6 @@ class HostSet { COUNTER(upstream_rq_retry) \ COUNTER(upstream_rq_retry_success) \ COUNTER(upstream_rq_retry_overflow) \ - COUNTER(lb_healthy_panic) \ - COUNTER(lb_local_cluster_not_ok) \ - COUNTER(lb_zone_cluster_too_small) \ - COUNTER(lb_zone_number_differs) \ - COUNTER(lb_zone_routing_all_directly) \ - COUNTER(lb_zone_routing_sampled) \ - COUNTER(lb_zone_routing_cross_zone) \ GAUGE (max_host_weight) \ COUNTER(membership_change) \ GAUGE (membership_total) \ From e49231af506e3e08958d2c9a74efecb31835157d Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 14:14:45 -0700 Subject: [PATCH 12/13] fix_format. --- source/common/upstream/load_balancer_impl.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 32f45ab6bbd6..eaafec9d0f78 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -121,7 +121,8 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // bucket sizes (residual capacity). For simplicity of finding where specific // sampled value is, we accumulate values in residual capacity. This is what it will look like: // residual_capacity: 0 10000 15000 - // Now to find zone to route (bucket) we could simply iterate over residual_capacity searching where + // Now to find zone to route (bucket) we could simply iterate over residual_capacity searching + // where // sampled value is placed. uint64_t residual_capacity[number_of_zones]; From 90067cf23ad0aa550e414efa56f189ef76385539 Mon Sep 17 00:00:00 2001 From: Roman Dzhabarov Date: Mon, 31 Oct 2016 14:24:43 -0700 Subject: [PATCH 13/13] line wrapping. --- source/common/upstream/load_balancer_impl.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index eaafec9d0f78..69923ac7c464 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -121,9 +121,8 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // bucket sizes (residual capacity). For simplicity of finding where specific // sampled value is, we accumulate values in residual capacity. This is what it will look like: // residual_capacity: 0 10000 15000 - // Now to find zone to route (bucket) we could simply iterate over residual_capacity searching - // where - // sampled value is placed. + // Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching + // where sampled value is placed. uint64_t residual_capacity[number_of_zones]; // Local zone (index 0) does not have residual capacity as we have routed all we could.