diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 33251a0f0c18..11cbe8928bf5 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -148,6 +148,13 @@ class HostSet { */ // clang-format off #define ALL_CLUSTER_STATS(COUNTER, GAUGE, TIMER) \ + COUNTER(lb_healthy_panic) \ + COUNTER(lb_local_cluster_not_ok) \ + COUNTER(lb_zone_cluster_too_small) \ + COUNTER(lb_zone_number_differs) \ + COUNTER(lb_zone_routing_all_directly) \ + COUNTER(lb_zone_routing_sampled) \ + COUNTER(lb_zone_routing_cross_zone) \ COUNTER(upstream_cx_total) \ GAUGE (upstream_cx_active) \ COUNTER(upstream_cx_http1_total) \ @@ -185,17 +192,12 @@ class HostSet { COUNTER(upstream_rq_retry) \ COUNTER(upstream_rq_retry_success) \ COUNTER(upstream_rq_retry_overflow) \ - COUNTER(upstream_rq_lb_healthy_panic) \ + GAUGE (max_host_weight) \ COUNTER(membership_change) \ GAUGE (membership_total) \ COUNTER(update_attempt) \ COUNTER(update_success) \ - COUNTER(update_failure) \ - COUNTER(zone_cluster_too_small) \ - COUNTER(zone_over_percentage) \ - COUNTER(zone_routing_sampled) \ - COUNTER(zone_routing_no_sampled) \ - GAUGE (max_host_weight) + COUNTER(update_failure) // clang-format on /** diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index b27125b425c3..69923ac7c464 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -25,69 +25,146 @@ bool LoadBalancerBase::earlyExitNonZoneRouting() { runtime_.snapshot().getInteger("upstream.zone_routing.min_cluster_size", 6U); if (host_set_.healthyHosts().size() < min_cluster_size) { - stats_.zone_cluster_too_small_.inc(); + stats_.lb_zone_cluster_too_small_.inc(); + return true; + } + + // If local cluster is not set, or we are in panic mode for it. + if (local_host_set_ == nullptr || local_host_set_->hosts().empty() || + isGlobalPanic(*local_host_set_)) { + stats_.lb_local_cluster_not_ok_.inc(); + return true; + } + + // Same number of zones should be for local and upstream cluster. + if (host_set_.healthyHostsPerZone().size() != local_host_set_->healthyHostsPerZone().size()) { + stats_.lb_zone_number_differs_.inc(); return true; } return false; } -bool LoadBalancerBase::isGlobalPanic() { +bool LoadBalancerBase::isGlobalPanic(const HostSet& host_set) { uint64_t global_panic_threshold = std::min(100UL, runtime_.snapshot().getInteger("upstream.healthy_panic_threshold", 50)); - double healthy_percent = 100.0 * host_set_.healthyHosts().size() / host_set_.hosts().size(); + double healthy_percent = 100.0 * host_set.healthyHosts().size() / host_set.hosts().size(); // If the % of healthy hosts in the cluster is less than our panic threshold, we use all hosts. if (healthy_percent < global_panic_threshold) { - stats_.upstream_rq_lb_healthy_panic_.inc(); + stats_.lb_healthy_panic_.inc(); return true; } return false; } -const std::vector& LoadBalancerBase::hostsToUse() { - ASSERT(host_set_.healthyHosts().size() <= host_set_.hosts().size()); - - if (host_set_.hosts().empty() || isGlobalPanic()) { - return host_set_.hosts(); +void LoadBalancerBase::calculateZonePercentage( + const std::vector>& hosts_per_zone, uint64_t* ret) { + uint64_t total_hosts = 0; + for (const auto& zone_hosts : hosts_per_zone) { + total_hosts += zone_hosts.size(); } - if (earlyExitNonZoneRouting()) { - return host_set_.healthyHosts(); + if (total_hosts != 0) { + size_t i = 0; + for (const auto& zone_hosts : hosts_per_zone) { + ret[i++] = 10000ULL * zone_hosts.size() / total_hosts; + } } +} +const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // At this point it's guaranteed to be at least 2 zones. - uint32_t number_of_zones = host_set_.healthyHostsPerZone().size(); + size_t number_of_zones = host_set_.healthyHostsPerZone().size(); + ASSERT(number_of_zones >= 2U); - const std::vector& local_zone_healthy_hosts = host_set_.healthyHostsPerZone()[0]; + ASSERT(local_host_set_->healthyHostsPerZone().size == host_set_.healthyHostsPerZone().size()); + + uint64_t local_percentage[number_of_zones]; + calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage); - // If number of hosts in a local zone big enough then route all requests to the same zone. - if (local_zone_healthy_hosts.size() * number_of_zones >= host_set_.healthyHosts().size()) { - stats_.zone_over_percentage_.inc(); - return local_zone_healthy_hosts; + uint64_t upstream_percentage[number_of_zones]; + calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage); + + // Try to push all of the requests to the same zone first. + // If we have lower percent of hosts in the local cluster in the same zone, + // we can push all of the requests directly to upstream cluster in the same zone. + if (upstream_percentage[0] >= local_percentage[0]) { + stats_.lb_zone_routing_all_directly_.inc(); + return host_set_.healthyHostsPerZone()[0]; } - // If local zone ratio is lower than expected we should only partially route requests from the - // same zone. - double zone_host_ratio = 1.0 * local_zone_healthy_hosts.size() / host_set_.healthyHosts().size(); - double ratio_to_route = zone_host_ratio * number_of_zones; + // If we cannot route all requests to the same zone, calculate what percentage can be routed. + // For example, if local percentage is 20% and upstream is 10% + // we can route only 50% of requests directly. + uint64_t local_percent_route = upstream_percentage[0] * 10000 / local_percentage[0]; + if (random_.random() % 10000 < local_percent_route) { + stats_.lb_zone_routing_sampled_.inc(); + return host_set_.healthyHostsPerZone()[0]; + } - // Not zone routed requests will be distributed between all hosts and hence - // we need to route only fraction of req_percent_to_route to the local zone. - double actual_routing_ratio = (ratio_to_route - zone_host_ratio) / (1 - zone_host_ratio); + // At this point we must route cross zone as we cannot route to the local zone. + stats_.lb_zone_routing_cross_zone_.inc(); + + // Local zone does not have additional capacity (we have already routed what we could). + // Now we need to figure out how much traffic we can route cross zone and to which exact zone + // we should route. Percentage of requests routed cross zone to a specific zone needed be + // proportional to the residual capacity upstream zone has. + // + // residual_capacity contains capacity left in a given zone, we keep accumulating residual + // capacity to make search for sampled value easier. + // For example, if we have the following upstream and local percentage: + // local_percentage: 40000 40000 20000 + // upstream_percentage: 25000 50000 25000 + // Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to + // bucket sizes (residual capacity). For simplicity of finding where specific + // sampled value is, we accumulate values in residual capacity. This is what it will look like: + // residual_capacity: 0 10000 15000 + // Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching + // where sampled value is placed. + uint64_t residual_capacity[number_of_zones]; + + // Local zone (index 0) does not have residual capacity as we have routed all we could. + residual_capacity[0] = 0; + for (size_t i = 1; i < number_of_zones; ++i) { + // Only route to the zones that have additional capacity. + if (upstream_percentage[i] > local_percentage[i]) { + residual_capacity[i] = + residual_capacity[i - 1] + upstream_percentage[i] - local_percentage[i]; + } else { + // Zone with index "i" does not have residual capacity, but we keep accumulating previous + // values to make search easier on the next step. + residual_capacity[i] = residual_capacity[i - 1]; + } + } - // Scale actual_routing_ratio to improve precision. - const uint64_t scale_factor = 10000; - uint64_t zone_routing_threshold = scale_factor * actual_routing_ratio; + // Random sampling to select specific zone for cross zone traffic based on the additional + // capacity in zones. + uint64_t threshold = random_.random() % residual_capacity[number_of_zones - 1]; - if (random_.random() % 10000 < zone_routing_threshold) { - stats_.zone_routing_sampled_.inc(); - return local_zone_healthy_hosts; - } else { - stats_.zone_routing_no_sampled_.inc(); + // This potentially can be optimized to be O(log(N)) where N is the number of zones. + // Linear scan should be faster for smaller N, in most of the scenarios N will be small. + int i = 0; + while (threshold > residual_capacity[i]) { + i++; + } + + return host_set_.healthyHostsPerZone()[i]; +} + +const std::vector& LoadBalancerBase::hostsToUse() { + ASSERT(host_set_.healthyHosts().size() <= host_set_.hosts().size()); + + if (host_set_.hosts().empty() || isGlobalPanic(host_set_)) { + return host_set_.hosts(); + } + + if (earlyExitNonZoneRouting()) { return host_set_.healthyHosts(); } + + return tryChooseLocalZoneHosts(); } ConstHostPtr RoundRobinLoadBalancer::chooseHost() { diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index b2bc09987641..265056f060e6 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -26,8 +26,30 @@ class LoadBalancerBase { Runtime::RandomGenerator& random_; private: + /* + * @return decision on quick exit from zone aware host selection. + */ bool earlyExitNonZoneRouting(); - bool isGlobalPanic(); + + /** + * For the given host_set it @return if we should be in a panic mode or not. + * For example, if majority of hosts are unhealthy we'll be likely in a panic mode. + * In this case we'll route requests to hosts no matter if they are healthy or not. + */ + bool isGlobalPanic(const HostSet& host_set); + + /** + * Try to select upstream hosts from the same zone. + */ + const std::vector& tryChooseLocalZoneHosts(); + + /** + * @return (number of hosts in a given zone)/(total number of hosts) in ret param. + * The result is stored as integer number and scaled by 10000 multiplier for better precision. + * Caller is responsible for allocation/de-allocation of ret. + */ + void calculateZonePercentage(const std::vector>& hosts_per_zone, + uint64_t* ret); const HostSet& host_set_; const HostSet* local_host_set_; diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index 8b69af6fd89f..3272ed059966 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -105,7 +105,7 @@ typedef std::shared_ptr>> HostListsPtr; typedef std::shared_ptr>> ConstHostListsPtr; /** - * Base clase for all clusters as well as thread local host sets. + * Base class for all clusters as well as thread local host sets. */ class HostSetImpl : public virtual HostSet { public: diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index 36576a32e91e..3011b90c6252 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -18,30 +18,48 @@ class RoundRobinLoadBalancerTest : public testing::Test { public: RoundRobinLoadBalancerTest() : stats_(ClusterImplBase::generateStats("", stats_store_)) {} + void init(bool need_local_cluster) { + if (need_local_cluster) { + local_cluster_hosts_.reset(new HostSetImpl()); + lb_.reset(new RoundRobinLoadBalancer(cluster_, local_cluster_hosts_.get(), stats_, runtime_, + random_)); + } else { + lb_.reset(new RoundRobinLoadBalancer(cluster_, nullptr, stats_, runtime_, random_)); + } + } + NiceMock cluster_; NiceMock runtime_; NiceMock random_; Stats::IsolatedStoreImpl stats_store_; ClusterStats stats_; - RoundRobinLoadBalancer lb_{cluster_, nullptr, stats_, runtime_, random_}; + std::shared_ptr local_cluster_hosts_; + std::shared_ptr lb_; + std::vector empty_host_vector_; }; -TEST_F(RoundRobinLoadBalancerTest, NoHosts) { EXPECT_EQ(nullptr, lb_.chooseHost()); } +TEST_F(RoundRobinLoadBalancerTest, NoHosts) { + init(false); + EXPECT_EQ(nullptr, lb_->chooseHost()); +} TEST_F(RoundRobinLoadBalancerTest, SingleHost) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; cluster_.hosts_ = cluster_.healthy_hosts_; - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, Normal) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}; cluster_.hosts_ = cluster_.healthy_hosts_; - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { + init(false); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}; cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), @@ -51,9 +69,9 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:85")}; - EXPECT_EQ(cluster_.hosts_[0], lb_.chooseHost()); - EXPECT_EQ(cluster_.hosts_[1], lb_.chooseHost()); - EXPECT_EQ(cluster_.hosts_[2], lb_.chooseHost()); + EXPECT_EQ(cluster_.hosts_[0], lb_->chooseHost()); + EXPECT_EQ(cluster_.hosts_[1], lb_->chooseHost()); + EXPECT_EQ(cluster_.hosts_[2], lb_->chooseHost()); // Take the threshold back above the panic threshold. cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), @@ -61,44 +79,95 @@ TEST_F(RoundRobinLoadBalancerTest, MaxUnhealthyPanic) { newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83")}; - EXPECT_EQ(cluster_.healthy_hosts_[3], lb_.chooseHost()); - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[3], lb_->chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); - EXPECT_EQ(3UL, stats_.upstream_rq_lb_healthy_panic_.value()); + EXPECT_EQ(3UL, stats_.lb_healthy_panic_.value()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}}; + init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); + + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) + .WillRepeatedly(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); - EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.min_cluster_size", 6)) + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) .WillRepeatedly(Return(6)); - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); - EXPECT_EQ(1U, stats_.zone_cluster_too_small_.value()); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); - EXPECT_EQ(2U, stats_.zone_cluster_too_small_.value()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value()); + EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); + EXPECT_EQ(2U, stats_.lb_zone_cluster_too_small_.value()); + EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); + EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); + + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) + .WillRepeatedly(Return(1)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); + EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); } -TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}}; +TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { + init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr upstream_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); + HostListsPtr local_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}})); + + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, local_hosts_per_zone, local_hosts_per_zone, + empty_host_vector_, empty_host_vector_); + + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) + .WillRepeatedly(Return(50)); + EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) + .WillRepeatedly(Return(true)); + EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) + .WillOnce(Return(1)); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.lb_zone_number_differs_.value()); +} + +TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { + init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); + + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); @@ -109,33 +178,47 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZone) { .WillRepeatedly(Return(3)); // There is only one host in the given zone for zone aware routing. - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); - EXPECT_EQ(1U, stats_.zone_over_percentage_.value()); - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); - EXPECT_EQ(2U, stats_.zone_over_percentage_.value()); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.lb_zone_routing_all_directly_.value()); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); + EXPECT_EQ(2U, stats_.lb_zone_routing_all_directly_.value()); // Disable runtime global zone routing. EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(false)); - EXPECT_EQ(cluster_.healthy_hosts_[2], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}, - {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), - newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}}; + init(true); + HostVectorPtr upstream_hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")})); + HostVectorPtr local_hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:0"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:1"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:2")})); + + HostListsPtr upstream_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:83"), + newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:84")}})); + + HostListsPtr local_hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:0")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:1")}, + {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:2")}})); + + cluster_.healthy_hosts_ = *upstream_hosts; + cluster_.hosts_ = *upstream_hosts; + cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, + local_hosts_per_zone, empty_host_vector_, empty_host_vector_); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); @@ -146,27 +229,32 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { .WillRepeatedly(Return(5)); // There is only one host in the given zone for zone aware routing. - EXPECT_CALL(random_, random()).WillOnce(Return(1000)); - EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_.chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_sampled_.value()); - EXPECT_CALL(random_, random()).WillOnce(Return(6500)); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); - EXPECT_EQ(1U, stats_.zone_routing_no_sampled_.value()); + EXPECT_CALL(random_, random()).WillOnce(Return(100)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.lb_zone_routing_sampled_.value()); + // Force request out of small zone. + EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); + EXPECT_EQ(cluster_.healthy_hosts_per_zone_[1][1], lb_->chooseHost()); + EXPECT_EQ(1U, stats_.lb_zone_routing_cross_zone_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareRoutingOneZone) { - cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; - cluster_.hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}; - cluster_.healthy_hosts_per_zone_ = {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}}; - - EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)).Times(0); - EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) - .WillRepeatedly(Return(50)); - - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); + init(true); + HostVectorPtr hosts( + new std::vector({newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")})); + HostListsPtr hosts_per_zone(new std::vector>( + {{newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81")}})); + + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); } TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingNotHealthy) { + init(true); cluster_.healthy_hosts_ = {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:81"), newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}; @@ -181,8 +269,8 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingNotHealthy) { .WillRepeatedly(Return(50)); // local zone has no healthy hosts, take from the all healthy hosts. - EXPECT_EQ(cluster_.healthy_hosts_[0], lb_.chooseHost()); - EXPECT_EQ(cluster_.healthy_hosts_[1], lb_.chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); + EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); } class LeastRequestLoadBalancerTest : public testing::Test { diff --git a/test/common/upstream/load_balancer_simulation_test.cc b/test/common/upstream/load_balancer_simulation_test.cc index bb04b638cc92..f206a2a2e61b 100644 --- a/test/common/upstream/load_balancer_simulation_test.cc +++ b/test/common/upstream/load_balancer_simulation_test.cc @@ -39,38 +39,59 @@ class DISABLED_SimulationTest : public testing::Test { */ void run(std::vector originating_cluster, std::vector all_destination_cluster, std::vector healthy_destination_cluster) { + local_host_set_ = new HostSetImpl(); + // TODO: make load balancer per originating cluster host. + RandomLoadBalancer lb(cluster_, local_host_set_, stats_, runtime_, random_); - std::vector> per_zone_hosts = - generateHostsPerZone(healthy_destination_cluster); + HostListsPtr upstream_per_zone_hosts = generateHostsPerZone(healthy_destination_cluster); + HostListsPtr local_per_zone_hosts = generateHostsPerZone(originating_cluster); - std::vector originating_hosts = generateHostList(originating_cluster); - cluster_.healthy_hosts_ = generateHostList(healthy_destination_cluster); - cluster_.hosts_ = generateHostList(all_destination_cluster); + HostVectorPtr originating_hosts = generateHostList(originating_cluster); + HostVectorPtr healthy_destination = generateHostList(healthy_destination_cluster); + cluster_.healthy_hosts_ = *healthy_destination; + HostVectorPtr all_destination = generateHostList(all_destination_cluster); + cluster_.hosts_ = *all_destination; std::map hits; for (uint32_t i = 0; i < total_number_of_requests; ++i) { - HostPtr from_host = selectOriginatingHost(originating_hosts); + HostPtr from_host = selectOriginatingHost(*originating_hosts); uint32_t from_zone = atoi(from_host->zone().c_str()); - std::vector> per_zone_upstream; - per_zone_upstream.push_back(per_zone_hosts[from_zone]); - for (size_t pos = 0; pos < per_zone_hosts.size(); ++pos) { - if (pos == from_zone) { + // Populate host set for upstream cluster. + HostListsPtr per_zone_upstream(new std::vector>()); + per_zone_upstream->push_back((*upstream_per_zone_hosts)[from_zone]); + for (size_t zone = 0; zone < upstream_per_zone_hosts->size(); ++zone) { + if (zone == from_zone) { continue; } - per_zone_upstream.push_back(per_zone_hosts[pos]); + per_zone_upstream->push_back((*upstream_per_zone_hosts)[zone]); } + cluster_.hosts_per_zone_ = *per_zone_upstream; + cluster_.healthy_hosts_per_zone_ = *per_zone_upstream; + + // Populate host set for originating cluster. + HostListsPtr per_zone_local(new std::vector>()); + per_zone_local->push_back((*local_per_zone_hosts)[from_zone]); + for (size_t zone = 0; zone < local_per_zone_hosts->size(); ++zone) { + if (zone == from_zone) { + continue; + } - cluster_.healthy_hosts_per_zone_ = std::move(per_zone_upstream); + per_zone_local->push_back((*local_per_zone_hosts)[zone]); + } + local_host_set_->updateHosts(originating_hosts, originating_hosts, per_zone_local, + per_zone_local, empty_vector_, empty_vector_); - ConstHostPtr selected = lb_.chooseHost(); + ConstHostPtr selected = lb.chooseHost(); hits[selected->url()]++; } + double mean = total_number_of_requests * 1.0 / hits.size(); for (const auto& host_hit_num_pair : hits) { - std::cout << fmt::format("url:{}, hits:{}", host_hit_num_pair.first, host_hit_num_pair.second) - << std::endl; + double percent_diff = std::abs((mean - host_hit_num_pair.second) / mean) * 100; + std::cout << fmt::format("url:{}, hits:{}, {} % from mean", host_hit_num_pair.first, + host_hit_num_pair.second, percent_diff) << std::endl; } } @@ -83,13 +104,13 @@ class DISABLED_SimulationTest : public testing::Test { * Generate list of hosts based on number of hosts in the given zone. * @param hosts number of hosts per zone. */ - std::vector generateHostList(const std::vector& hosts) { - std::vector ret; + HostVectorPtr generateHostList(const std::vector& hosts) { + HostVectorPtr ret(new std::vector()); for (size_t i = 0; i < hosts.size(); ++i) { const std::string zone = std::to_string(i); for (uint32_t j = 0; j < hosts[i]; ++j) { const std::string url = fmt::format("tcp://host.{}.{}:80", i, j); - ret.push_back(newTestHost(cluster_, url, 1, zone)); + ret->push_back(newTestHost(cluster_, url, 1, zone)); } } @@ -100,8 +121,8 @@ class DISABLED_SimulationTest : public testing::Test { * Generate hosts by zone. * @param hosts number of hosts per zone. */ - std::vector> generateHostsPerZone(const std::vector& hosts) { - std::vector> ret; + HostListsPtr generateHostsPerZone(const std::vector& hosts) { + HostListsPtr ret(new std::vector>()); for (size_t i = 0; i < hosts.size(); ++i) { const std::string zone = std::to_string(i); std::vector zone_hosts; @@ -111,21 +132,21 @@ class DISABLED_SimulationTest : public testing::Test { zone_hosts.push_back(newTestHost(cluster_, url, 1, zone)); } - ret.push_back(std::move(zone_hosts)); + ret->push_back(std::move(zone_hosts)); } return ret; }; - const uint32_t total_number_of_requests = 3000000; + const uint32_t total_number_of_requests = 1000000; + std::vector empty_vector_; + HostSetImpl* local_host_set_; NiceMock cluster_; NiceMock runtime_; Runtime::RandomGeneratorImpl random_; Stats::IsolatedStoreImpl stats_store_; ClusterStats stats_; - // TODO: make per originating host load balancer. - RandomLoadBalancer lb_{cluster_, nullptr, stats_, runtime_, random_}; }; TEST_F(DISABLED_SimulationTest, strictlyEqualDistribution) { @@ -145,7 +166,15 @@ TEST_F(DISABLED_SimulationTest, unequalZoneDistribution3) { } TEST_F(DISABLED_SimulationTest, unequalZoneDistribution4) { - run({20U, 20U, 21U}, {4U, 4U, 5U}, {4U, 5U, 5U}); + run({20U, 20U, 21U}, {4U, 5U, 5U}, {4U, 5U, 5U}); +} + +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution5) { + run({3U, 2U, 5U}, {4U, 5U, 5U}, {4U, 5U, 5U}); +} + +TEST_F(DISABLED_SimulationTest, unequalZoneDistribution6) { + run({3U, 2U, 5U}, {3U, 4U, 5U}, {3U, 4U, 5U}); } } // Upstream \ No newline at end of file