Skip to content

Commit

Permalink
overload: scale selected timers in response to load (#13475)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Konradi <akonradi@google.com>
  • Loading branch information
akonradi authored Oct 28, 2020
1 parent f6991d9 commit a68755d
Show file tree
Hide file tree
Showing 26 changed files with 462 additions and 16 deletions.
1 change: 1 addition & 0 deletions api/envoy/config/overload/v3/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ licenses(["notice"]) # Apache 2
api_proto_package(
deps = [
"//envoy/config/overload/v2alpha:pkg",
"//envoy/type/v3:pkg",
"@com_github_cncf_udpa//udpa/annotations:pkg",
],
)
37 changes: 37 additions & 0 deletions api/envoy/config/overload/v3/overload.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ syntax = "proto3";

package envoy.config.overload.v3;

import "envoy/type/v3/percent.proto";

import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
Expand Down Expand Up @@ -80,6 +82,38 @@ message Trigger {
}
}

// Typed configuration for the "envoy.overload_actions.reduce_timeouts" action. See
// :ref:`the docs <config_overload_manager_reducing_timeouts>` for an example of how to configure
// the action with different timeouts and minimum values.
message ScaleTimersOverloadActionConfig {
enum TimerType {
// Unsupported value; users must explicitly specify the timer they want scaled.
UNSPECIFIED = 0;

// Adjusts the idle timer for downstream HTTP connections that takes effect when there are no active streams.
// This affects the value of :ref:`RouteAction.idle_timeout <envoy_v3_api_field_config.route.v3.RouteAction.idle_timeout>`.
HTTP_DOWNSTREAM_CONNECTION_IDLE = 1;
}

message ScaleTimer {
// The type of timer this minimum applies to.
TimerType timer = 1 [(validate.rules).enum = {defined_only: true not_in: 0}];

oneof overload_adjust {
option (validate.required) = true;

// Sets the minimum duration as an absolute value.
google.protobuf.Duration min_timeout = 2;

// Sets the minimum duration as a percentage of the maximum value.
type.v3.Percent min_scale = 3;
}
}

// A set of timer scaling rules to be applied.
repeated ScaleTimer timer_scale_factors = 1 [(validate.rules).repeated = {min_items: 1}];
}

message OverloadAction {
option (udpa.annotations.versioning).previous_message_type =
"envoy.config.overload.v2alpha.OverloadAction";
Expand All @@ -93,6 +127,9 @@ message OverloadAction {
// state of all triggers, which can be scaling between 0 and 1 or saturated. Listeners
// are notified when the overload action changes state.
repeated Trigger triggers = 2 [(validate.rules).repeated = {min_items: 1}];

// Configuration for the action being instantiated.
google.protobuf.Any typed_config = 3;
}

message OverloadManager {
Expand Down
4 changes: 4 additions & 0 deletions api/envoy/config/route/v3/route_components.proto
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,10 @@ message RouteAction {
// fires, the stream is terminated with a 408 Request Timeout error code if no
// upstream response header has been received, otherwise a stream reset
// occurs.
//
// If the :ref:`overload action <config_overload_manager_overload_actions>` "envoy.overload_actions.reduce_timeouts"
// is configured, this timeout is scaled according to the value for
// :ref:`HTTP_DOWNSTREAM_CONNECTION_IDLE <envoy_api_enum_value_config.overload.v3.ScaleTimersOverloadActionConfig.TimerType.HTTP_DOWNSTREAM_CONNECTION_IDLE>`.
google.protobuf.Duration idle_timeout = 24;

// Indicates that the route has a retry policy. Note that if this is set,
Expand Down
4 changes: 4 additions & 0 deletions api/envoy/config/route/v4alpha/route_components.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ The overload manager uses Envoy's :ref:`extension <extending>` framework for def
resource monitors. Envoy's builtin resource monitors are listed
:ref:`here <config_resource_monitors>`.

.. _config_overload_manager_triggers:

Triggers
--------

Expand Down Expand Up @@ -90,6 +92,53 @@ The following overload actions are supported:
* - envoy.overload_actions.shrink_heap
- Envoy will periodically try to shrink the heap by releasing free memory to the system

* - envoy.overload_actions.reduce_timeouts
- Envoy will reduce the waiting period for a configured set of timeouts. See
:ref:`below <config_overload_manager_reducing_timeouts>` for details on configuration.

.. _config_overload_manager_reducing_timeouts:

Reducing timeouts
^^^^^^^^^^^^^^^^^

The `envoy.overload_actions.reduce_timeouts` overload action will reduce the amount of time Envoy
will spend waiting for some interactions to finish in response to resource pressure. The amount of
reduction can be configured per timeout type by specifying the minimum timer value to use when the
triggering resource monitor detects saturation. The minimum value for each timeout can be specified
either by providing a scale factor to apply to the configured maximum, or as a concrete duration
value.

As an example, here is a single overload action entry that enables timeout reduction:

.. code-block:: yaml
name: "envoy.overload_actions.reduce_timeouts"
triggers:
- name: "envoy.resource_monitors.fixed_heap"
scaled:
scaling_threshold: 0.85
saturation_threshold: 0.95
typed_config:
"@type": type.googleapis.com/envoy.config.overload.v3.ScaleTimersOverloadActionConfig
timer_scale_factors:
- timer: HTTP_DOWNSTREAM_CONNECTION_IDLE
min_timeout: 2s
It configures the overload manager to change the amount of time that HTTP connections are allowed
to remain idle before being closed in response to heap size. When the heap usage is less than 85%,
idle connections will time out at their usual time, which is configured through
:ref:`RouteAction.idle_timeout <envoy_v3_api_field_config.route.v3.RouteAction.idle_timeout>`.
When the heap usage is at or above 95%, idle connections will be closed after the specified
`min_timeout`, here 2 seconds. If the heap usage is between 85% and 95%, the idle connection timeout
will vary between those two based on the formula for the :ref:`scaled trigger <config_overload_manager_triggers>`
So if `RouteAction.idle_timeout = 600 seconds` and heap usage is at 92%, idle connections will time
out after :math:`2s + (600s - 2s) \cdot (95\% - 92\%) / (95\% - 85\%) = 181.4s`.

Note in the example that the minimum idle time is specified as an absolute duration. If, instead,
`min_timeout: 2s` were to be replaced with `min_scale: { value: 10 }`, the minimum timer value
would be computed based on the maximum (specified elsewhere). So if `RouteAction.idle_timeout` is
again 600 seconds, then the minimum timer value would be :math:`10\% \cdot 600s = 60s`.

Limiting Active Connections
---------------------------

Expand Down
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ New Features
* lua: added `downstreamDirectRemoteAddress()` and `downstreamLocalAddress()` APIs to :ref:`streamInfo() <config_http_filters_lua_stream_info_wrapper>`.
* mongo_proxy: the list of commands to produce metrics for is now :ref:`configurable <envoy_v3_api_field_extensions.filters.network.mongo_proxy.v3.MongoProxy.commands>`.
* network: added a :ref:`timeout <envoy_v3_api_field_config.listener.v3.FilterChain.transport_socket_connect_timeout>` for incoming connections completing transport-level negotiation, including TLS and ALTS hanshakes.
* overload: add :ref:`envoy.overload_actions.reduce_timeouts <config_overload_manager_overload_actions>` overload action to enable scaling timeouts down with load.
* ratelimit: added support for use of various :ref:`metadata <envoy_v3_api_field_config.route.v3.RateLimit.Action.metadata>` as a ratelimit action.
* ratelimit: added :ref:`disable_x_envoy_ratelimited_header <envoy_v3_api_msg_extensions.filters.http.ratelimit.v3.RateLimit>` option to disable `X-Envoy-RateLimited` header.
* tcp: added a new :ref:`envoy.overload_actions.reject_incoming_connections <config_overload_manager_overload_actions>` action to reject incoming TCP connections.
Expand Down
1 change: 1 addition & 0 deletions generated_api_shadow/envoy/config/overload/v3/BUILD

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 37 additions & 0 deletions generated_api_shadow/envoy/config/overload/v3/overload.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions include/envoy/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ envoy_cc_library(
name = "overload_manager_interface",
hdrs = ["overload_manager.h"],
deps = [
"//include/envoy/event:timer_interface",
"//include/envoy/thread_local:thread_local_interface",
"//source/common/singleton:const_singleton",
],
Expand Down
16 changes: 16 additions & 0 deletions include/envoy/server/overload_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <string>

#include "envoy/common/pure.h"
#include "envoy/event/timer.h"
#include "envoy/thread_local/thread_local.h"

#include "common/common/macros.h"
Expand Down Expand Up @@ -39,13 +40,25 @@ class OverloadActionState {
*/
using OverloadActionCb = std::function<void(OverloadActionState)>;

enum class OverloadTimerType {
// Timers created with this type will never be scaled. This should only be used for testing.
UnscaledRealTimerForTest,
// The amount of time an HTTP connection to a downstream client can remain idle (no streams). This
// corresponds to the HTTP_DOWNSTREAM_CONNECTION_IDLE TimerType in overload.proto.
HttpDownstreamIdleConnectionTimeout,
};

/**
* Thread-local copy of the state of each configured overload action.
*/
class ThreadLocalOverloadState : public ThreadLocal::ThreadLocalObject {
public:
// Get a thread-local reference to the value for the given action key.
virtual const OverloadActionState& getState(const std::string& action) PURE;

// Get a scaled timer whose minimum corresponds to the configured value for the given timer type.
virtual Event::TimerPtr createScaledTimer(OverloadTimerType timer_type,
Event::TimerCb callback) PURE;
};

/**
Expand All @@ -68,6 +81,9 @@ class OverloadActionNameValues {

// Overload action to try to shrink the heap by releasing free memory.
const std::string ShrinkHeap = "envoy.overload_actions.shrink_heap";

// Overload action to reduce some subset of configured timeouts.
const std::string ReduceTimeouts = "envoy.overload_actions.reduce_timeouts";
};

using OverloadActionNames = ConstSingleton<OverloadActionNameValues>;
Expand Down
12 changes: 7 additions & 5 deletions source/common/http/conn_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,11 @@ ConnectionManagerImpl::ConnectionManagerImpl(ConnectionManagerConfig& config,
random_generator_(random_generator), http_context_(http_context), runtime_(runtime),
local_info_(local_info), cluster_manager_(cluster_manager),
listener_stats_(config_.listenerStats()),
overload_stop_accepting_requests_ref_(overload_manager.getThreadLocalOverloadState().getState(
Server::OverloadActionNames::get().StopAcceptingRequests)),
overload_disable_keepalive_ref_(overload_manager.getThreadLocalOverloadState().getState(
Server::OverloadActionNames::get().DisableHttpKeepAlive)),
overload_state_(overload_manager.getThreadLocalOverloadState()),
overload_stop_accepting_requests_ref_(
overload_state_.getState(Server::OverloadActionNames::get().StopAcceptingRequests)),
overload_disable_keepalive_ref_(
overload_state_.getState(Server::OverloadActionNames::get().DisableHttpKeepAlive)),
time_source_(time_source) {}

const ResponseHeaderMap& ConnectionManagerImpl::continueHeader() {
Expand All @@ -120,7 +121,8 @@ void ConnectionManagerImpl::initializeReadFilterCallbacks(Network::ReadFilterCal
read_callbacks_->connection().addConnectionCallbacks(*this);

if (config_.idleTimeout()) {
connection_idle_timer_ = read_callbacks_->connection().dispatcher().createTimer(
connection_idle_timer_ = overload_state_.createScaledTimer(
Server::OverloadTimerType::HttpDownstreamIdleConnectionTimeout,
[this]() -> void { onIdleTimeout(); });
connection_idle_timer_->enableTimer(config_.idleTimeout().value());
}
Expand Down
1 change: 1 addition & 0 deletions source/common/http/conn_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ class ConnectionManagerImpl : Logger::Loggable<Logger::Id::http>,
Upstream::ClusterManager& cluster_manager_;
Network::ReadFilterCallbacks* read_callbacks_{};
ConnectionManagerListenerStats& listener_stats_;
Server::ThreadLocalOverloadState& overload_state_;
// References into the overload manager thread local state map. Using these lets us avoid a
// map lookup in the hot path of processing each request.
const Server::OverloadActionState& overload_stop_accepting_requests_ref_;
Expand Down
1 change: 1 addition & 0 deletions source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ envoy_cc_library(
"//include/envoy/thread_local:thread_local_interface",
"//source/common/common:logger_lib",
"//source/common/config:utility_lib",
"//source/common/event:scaled_range_timer_manager_lib",
"//source/common/stats:symbol_table_lib",
"//source/server:resource_monitor_config_lib",
"@envoy_api//envoy/config/overload/v3:pkg_cc_proto",
Expand Down
9 changes: 7 additions & 2 deletions source/server/admin/admin.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,17 +252,22 @@ class AdminImpl : public Admin,
*/
struct NullOverloadManager : public OverloadManager {
struct NullThreadLocalOverloadState : public ThreadLocalOverloadState {
NullThreadLocalOverloadState(Event::Dispatcher& dispatcher) : dispatcher_(dispatcher) {}
const OverloadActionState& getState(const std::string&) override { return inactive_; }
Event::TimerPtr createScaledTimer(OverloadTimerType, Event::TimerCb callback) override {
return dispatcher_.createTimer(callback);
}

Event::Dispatcher& dispatcher_;
const OverloadActionState inactive_ = OverloadActionState::inactive();
};

NullOverloadManager(ThreadLocal::SlotAllocator& slot_allocator)
: tls_(slot_allocator.allocateSlot()) {}

void start() override {
tls_->set([](Event::Dispatcher&) -> ThreadLocal::ThreadLocalObjectSharedPtr {
return std::make_shared<NullThreadLocalOverloadState>();
tls_->set([](Event::Dispatcher& dispatcher) -> ThreadLocal::ThreadLocalObjectSharedPtr {
return std::make_shared<NullThreadLocalOverloadState>(dispatcher);
});
}

Expand Down
Loading

0 comments on commit a68755d

Please sign in to comment.