envoyproxy · alyssawilk · Jan 13, 2021 · Nov 18, 2020 · Nov 23, 2020 · Dec 2, 2020
diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto
@@ -599,7 +599,7 @@ message Cluster {
     // for example proxying HTTP/1.1 if keep-alive were false and each stream resulted in connection
     // termination. It would likely be overkill for long lived connections, such as TCP proxying SMTP
     // or regular HTTP/1.1 with keep-alive. For long lived traffic, a value of 1.05 would be more
-    // reasonable, where for every 100 connections, 5 pre-established connections would be in the queue
+    // reasonable, where for every 100 connections, 5 preconnected connections would be in the queue
     // in case of unexpected disconnects where the connection could not be reused.
     //
     // If this value is not set, or set explicitly to one, Envoy will fetch as many connections

diff --git a/api/envoy/config/cluster/v4alpha/cluster.proto b/api/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto
diff --git a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/include/envoy/common/conn_pool.h b/include/envoy/common/conn_pool.h
@@ -70,10 +70,10 @@ class Instance {
   virtual Upstream::HostDescriptionConstSharedPtr host() const PURE;
 
   /**
-   * Establishes an additional upstream connection, if existing connections do not meet both current
-   * and anticipated load.
+   * Creates an upstream connection, if existing connections do not meet both current and
+   * anticipated load.
    *
-   * @return true if a connection was established, false otherwise.
+   * @return true if a connection was preconnected, false otherwise.
    */
   virtual bool maybePreconnect(float preconnect_ratio) PURE;
 };

diff --git a/source/common/upstream/cluster_manager_impl.cc b/source/common/upstream/cluster_manager_impl.cc
@@ -858,20 +858,15 @@ void ClusterManagerImpl::maybePreconnect(
   for (int i = 0; i < 3; ++i) {
     // Just as in ConnPoolImplBase::shouldCreateNewConnection, see if adding this one new connection
     // would put the cluster over desired capacity. If so, stop preconnecting.
-    if ((state.pending_streams_ + 1 + state.active_streams_) * peekahead_ratio <=
-        (state.connecting_stream_capacity_ + state.active_streams_)) {
+    if ((state.connecting_stream_capacity_ + state.active_streams_) >
+        (state.pending_streams_ + 1 + state.active_streams_) * peekahead_ratio) {
       return;
     }
     ConnectionPool::Instance* preconnect_pool = pick_preconnect_pool();
-    if (preconnect_pool) {
-      if (!preconnect_pool->maybePreconnect(peekahead_ratio)) {
-        // Given that the next preconnect pick may be entirely different, we could
-        // opt to try again even if the first preconnect fails. Err on the side of
-        // caution and wait for the next attempt.
-        return;
-      }
-    } else {
-      // If unable to find a preconnect pool, exit early.
+    if (!preconnect_pool || !preconnect_pool->maybePreconnect(peekahead_ratio)) {
+      // Given that the next preconnect pick may be entirely different, we could
+      // opt to try again even if the first preconnect fails. Err on the side of
+      // caution and wait for the next attempt.
       return;
     }
   }
@@ -885,7 +880,7 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::httpConnPool(
   auto ret = connPool(priority, protocol, context, false);
 
   // Now see if another host should be preconnected.
-  // httpConnPoolForCluster is called immediately before a call for newStream. newStream doesn't
+  // httpConnPool is called immediately before a call for newStream. newStream doesn't
   // have the load balancer context needed to make selection decisions so preconnecting must be
   // performed here in anticipation of the new stream.
   // TODO(alyssawilk) refactor to have one function call and return a pair, so this invariant is
@@ -902,7 +897,7 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::tcpConnPool(
   // Select a host and create a connection pool for it if it does not already exist.
   auto ret = tcpConnPool(priority, context, false);
 
-  // tcpConnPoolForCluster is called immediately before a call for newConnection. newConnection
+  // tcpConnPool is called immediately before a call for newConnection. newConnection
   // doesn't have the load balancer context needed to make selection decisions so preconnecting must
   // be performed here in anticipation of the new connection.
   // TODO(alyssawilk) refactor to have one function call and return a pair, so this invariant is
@@ -1253,7 +1248,7 @@ void ClusterManagerImpl::ThreadLocalClusterManagerImpl::onHostHealthFailure(
 
   if (host->cluster().features() &
       ClusterInfo::Features::CLOSE_CONNECTIONS_ON_HOST_HEALTH_FAILURE) {
-    // Close non connection pool TCP connections obtained from tcpConnForCluster()
+    // Close non connection pool TCP connections obtained from tcpConn()
     //
     // TODO(jono): The only remaining user of the non-pooled connections seems to be the statsd
     // TCP client. Perhaps it could be rewritten to use a connection pool, and this code deleted.

diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc
@@ -22,6 +22,12 @@ static const std::string RuntimeZoneEnabled = "upstream.zone_routing.enabled";
 static const std::string RuntimeMinClusterSize = "upstream.zone_routing.min_cluster_size";
 static const std::string RuntimePanicThreshold = "upstream.healthy_panic_threshold";
 
+bool tooManyPreconnects(size_t num_preconnect_picks, uint32_t healthy_hosts) {
+  // Currently we only allow the number of preconnected connections to equal the
+  // number of healthy hosts.
+  return num_preconnect_picks >= healthy_hosts;
+}
+
 // Distributes load between priorities based on the per priority availability and the normalized
 // total availability. Load is assigned to each priority according to how available each priority is
 // adjusted for the normalized total availability.
@@ -780,7 +786,7 @@ void EdfLoadBalancerBase::refresh(uint32_t priority) {
 }
 
 HostConstSharedPtr EdfLoadBalancerBase::peekAnotherHost(LoadBalancerContext* context) {
-  if (stashed_random_.size() >= total_healthy_hosts_) {
+  if (tooManyPreconnects(stashed_random_.size(), total_healthy_hosts_)) {
     return nullptr;
   }
 
@@ -869,7 +875,7 @@ HostConstSharedPtr LeastRequestLoadBalancer::unweightedHostPick(const HostVector
 }
 
 HostConstSharedPtr RandomLoadBalancer::peekAnotherHost(LoadBalancerContext* context) {
-  if (stashed_random_.size() >= total_healthy_hosts_) {
+  if (tooManyPreconnects(stashed_random_.size(), total_healthy_hosts_)) {
     return nullptr;
   }
   return peekOrChoose(context, true);

diff --git a/test/common/upstream/load_balancer_fuzz.proto b/test/common/upstream/load_balancer_fuzz.proto
@@ -27,7 +27,7 @@ message LbAction {
     // This updates the health flags of hosts at a certain priority level. The number of hosts in each priority level/in localities is static,
     // as untrusted upstreams cannot change that, and can only change their health flags.
     UpdateHealthFlags update_health_flags = 1;
-    // preconnects a host using the encapsulated specific load balancer.
+    // Preconnects a host using the encapsulated specific load balancer.
     google.protobuf.Empty preconnect = 2;
     // Chooses a host using the encapsulated specific load balancer.
     google.protobuf.Empty choose_host = 3;