Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: add support for range trigger overload actions #12352

Merged
merged 18 commits into from
Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions api/envoy/config/overload/v3/overload.proto
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,18 @@ message ThresholdTrigger {
"envoy.config.overload.v2alpha.ThresholdTrigger";

// If the resource pressure is greater than or equal to this value, the trigger
// will fire.
// will enter saturation.
double value = 1 [(validate.rules).double = {lte: 1.0 gte: 0.0}];
}

message RangeTrigger {
// If the resource pressure is greater than this value, the trigger will enter the :ref:`scaling <arch_overview_overload_manager-triggers-state>` state.
double min_value = 1 [(validate.rules).double = {lte: 1.0 gte: 0.0}];
akonradi marked this conversation as resolved.
Show resolved Hide resolved

// If the resource pressure is greater than this value, the trigger will enter saturation.
double max_value = 2 [(validate.rules).double = {lte: 1.0 gte: 0.0}];
}

message Trigger {
option (udpa.annotations.versioning).previous_message_type =
"envoy.config.overload.v2alpha.Trigger";
Expand All @@ -65,6 +73,8 @@ message Trigger {
option (validate.required) = true;

ThresholdTrigger threshold = 2;

RangeTrigger range = 3;
}
}

Expand All @@ -77,9 +87,9 @@ message OverloadAction {
// DNS to ensure uniqueness.
string name = 1 [(validate.rules).string = {min_bytes: 1}];

// A set of triggers for this action. If any of these triggers fire the overload action
// is activated. Listeners are notified when the overload action transitions from
// inactivated to activated, or vice versa.
// A set of triggers for this action. The state of the action is the maximum
// state of all triggers, which can be inactive, scaling, or saturated.
// Listeners are notified when the overload action changes state.
repeated Trigger triggers = 2 [(validate.rules).repeated = {min_items: 1}];
}

Expand Down
18 changes: 14 additions & 4 deletions generated_api_shadow/envoy/config/overload/v3/overload.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 21 additions & 9 deletions include/envoy/server/overload_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,27 @@
namespace Envoy {
namespace Server {

enum class OverloadActionState {
/**
* Indicates that an overload action is active because at least one of its triggers has fired.
*/
Active,
/**
* Indicates that an overload action is inactive because none of its triggers have fired.
*/
Inactive
/**
* Tracks the state of an overload action. The state is a number between 0 and 1 that represents the
* level of saturation. The values are categorized in two groups:
* - Saturated (value = 1): indicates that an overload action is active because at least one of its
* triggers has reached saturation.
* - Scaling (0 <= value < 1): indicates that an overload action is not saturated.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of the function on L23 seems to imply that we refer to value = 0 as inactive, is this a useful distinction to make? Should that be included in this comment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am planning to handle this in a follow-up PR. It does seem like a useful distinction at least conceptually.

*/
class OverloadActionState {
public:
static constexpr OverloadActionState inactive() { return OverloadActionState(0); }

static constexpr OverloadActionState saturated() { return OverloadActionState(1.0); }

explicit constexpr OverloadActionState(float value)
: action_value_(value < 0 ? 0 : value < 1 ? value : 1) {}
akonradi marked this conversation as resolved.
Show resolved Hide resolved

float value() const { return action_value_; }
bool isSaturated() const { return action_value_ == 1; }

private:
float action_value_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, because then OverloadActionState isn't assignable.

};

/**
Expand Down
5 changes: 2 additions & 3 deletions source/common/http/conn_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -833,8 +833,7 @@ void ConnectionManagerImpl::ActiveStream::decodeHeaders(RequestHeaderMapPtr&& he
filter_manager_.maybeEndDecode(end_stream);

// Drop new requests when overloaded as soon as we have decoded the headers.
if (connection_manager_.overload_stop_accepting_requests_ref_ ==
Server::OverloadActionState::Active) {
if (connection_manager_.overload_stop_accepting_requests_ref_.isSaturated()) {
// In this one special case, do not create the filter chain. If there is a risk of memory
// overload it is more important to avoid unnecessary allocation than to create the filters.
state_.created_filter_chain_ = true;
Expand Down Expand Up @@ -1824,7 +1823,7 @@ void ConnectionManagerImpl::ActiveStream::encodeHeadersInternal(ResponseHeaderMa
}

if (connection_manager_.drain_state_ == DrainState::NotDraining &&
connection_manager_.overload_disable_keepalive_ref_ == Server::OverloadActionState::Active) {
connection_manager_.overload_disable_keepalive_ref_.isSaturated()) {
ENVOY_STREAM_LOG(debug, "disabling keepalive due to envoy overload", *this);
connection_manager_.drain_state_ = DrainState::Closing;
connection_manager_.stats_.named_.downstream_cx_overload_disable_keepalive_.inc();
Expand Down
7 changes: 3 additions & 4 deletions source/common/memory/heap_shrinker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ HeapShrinker::HeapShrinker(Event::Dispatcher& dispatcher, Server::OverloadManage
Stats::Scope& stats)
: active_(false) {
const auto action_name = Server::OverloadActionNames::get().ShrinkHeap;
if (overload_manager.registerForAction(action_name, dispatcher,
[this](Server::OverloadActionState state) {
active_ = (state == Server::OverloadActionState::Active);
})) {
if (overload_manager.registerForAction(
action_name, dispatcher,
[this](Server::OverloadActionState state) { active_ = state.isSaturated(); })) {
Envoy::Stats::StatNameManagedStorage stat_name(
absl::StrCat("overload.", action_name, ".shrink_count"), stats.symbolTable());
shrink_counter_ = &stats.counterFromStatName(stat_name.statName());
Expand Down
2 changes: 1 addition & 1 deletion source/server/admin/admin.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class AdminImpl : public Admin,
struct NullThreadLocalOverloadState : public ThreadLocalOverloadState {
const OverloadActionState& getState(const std::string&) override { return inactive_; }

const OverloadActionState inactive_ = OverloadActionState::Inactive;
const OverloadActionState inactive_ = OverloadActionState::inactive();
};

NullOverloadManager(ThreadLocal::SlotAllocator& slot_allocator)
Expand Down
115 changes: 89 additions & 26 deletions source/server/overload_manager_impl.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "server/overload_manager_impl.h"

#include "envoy/common/exception.h"
#include "envoy/config/overload/v3/overload.pb.h"
#include "envoy/stats/scope.h"

Expand All @@ -24,16 +25,51 @@ class ThresholdTriggerImpl : public OverloadAction::Trigger {
: threshold_(config.value()) {}

bool updateValue(double value) override {
const bool fired = isFired();
value_ = value;
return fired != isFired();
const OverloadActionState state = actionState();
state_.emplace(value >= threshold_ ? OverloadActionState::saturated()
: OverloadActionState::inactive());
return state.value() != actionState().value();
}

bool isFired() const override { return value_.has_value() && value_ >= threshold_; }
OverloadActionState actionState() const override {
return state_.value_or(OverloadActionState::inactive());
}

private:
const double threshold_;
absl::optional<double> value_;
absl::optional<OverloadActionState> state_;
};

class RangeTriggerImpl final : public OverloadAction::Trigger {
public:
RangeTriggerImpl(const envoy::config::overload::v3::RangeTrigger& config)
: minimum_(config.min_value()),
maximum_(config.max_value()) {
if (minimum_ >= maximum_) {
throw EnvoyException("min_value must be less than max_value");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can you adjust this exception message to match the field names? I.e. scaling/saturated_threshold.

}
}

bool updateValue(double value) override {
const OverloadActionState old_state = actionState();
if (value <= minimum_) {
state_ = OverloadActionState::inactive();
} else if (value >= maximum_) {
state_ = OverloadActionState::saturated();
} else {
state_ = OverloadActionState((value - minimum_) / (maximum_ - minimum_));
}
return state_->value() != old_state.value();
}

OverloadActionState actionState() const override {
return state_.value_or(OverloadActionState::inactive());
akonradi marked this conversation as resolved.
Show resolved Hide resolved
}

private:
const double minimum_;
const double maximum_;
absl::optional<OverloadActionState> state_;
};

/**
Expand All @@ -44,12 +80,19 @@ class ThreadLocalOverloadStateImpl : public ThreadLocalOverloadState {
const OverloadActionState& getState(const std::string& action) override {
auto it = actions_.find(action);
if (it == actions_.end()) {
it = actions_.insert(std::make_pair(action, OverloadActionState::Inactive)).first;
it = actions_.insert(std::make_pair(action, OverloadActionState::inactive())).first;
}
return it->second;
}

void setState(const std::string& action, OverloadActionState state) { actions_[action] = state; }
void setState(const std::string& action, OverloadActionState state) {
auto it = actions_.find(action);
if (it == actions_.end()) {
actions_.emplace(action, state);
} else {
it->second = state;
}
akonradi marked this conversation as resolved.
Show resolved Hide resolved
}
akonradi marked this conversation as resolved.
Show resolved Hide resolved

private:
absl::node_hash_map<std::string, OverloadActionState> actions_;
Expand All @@ -72,15 +115,21 @@ Stats::Gauge& makeGauge(Stats::Scope& scope, absl::string_view a, absl::string_v

OverloadAction::OverloadAction(const envoy::config::overload::v3::OverloadAction& config,
Stats::Scope& stats_scope)
: active_gauge_(
makeGauge(stats_scope, config.name(), "active", Stats::Gauge::ImportMode::Accumulate)) {
: state_(OverloadActionState::inactive()),
active_gauge_(
makeGauge(stats_scope, config.name(), "active", Stats::Gauge::ImportMode::Accumulate)),
scaling_gauge_(
makeGauge(stats_scope, config.name(), "scaling", Stats::Gauge::ImportMode::Accumulate)) {
akonradi marked this conversation as resolved.
Show resolved Hide resolved
for (const auto& trigger_config : config.triggers()) {
TriggerPtr trigger;

switch (trigger_config.trigger_oneof_case()) {
case envoy::config::overload::v3::Trigger::TriggerOneofCase::kThreshold:
trigger = std::make_unique<ThresholdTriggerImpl>(trigger_config.threshold());
break;
case envoy::config::overload::v3::Trigger::TriggerOneofCase::kRange:
trigger = std::make_unique<RangeTriggerImpl>(trigger_config.range());
break;
default:
NOT_REACHED_GCOVR_EXCL_LINE;
}
Expand All @@ -92,29 +141,41 @@ OverloadAction::OverloadAction(const envoy::config::overload::v3::OverloadAction
}

active_gauge_.set(0);
scaling_gauge_.set(0);
}

bool OverloadAction::updateResourcePressure(const std::string& name, double pressure) {
const bool active = isActive();
const OverloadActionState old_state = getState();

auto it = triggers_.find(name);
ASSERT(it != triggers_.end());
if (it->second->updateValue(pressure)) {
if (it->second->isFired()) {
active_gauge_.set(1);
const auto result = fired_triggers_.insert(name);
ASSERT(result.second);
} else {
active_gauge_.set(0);
const auto result = fired_triggers_.erase(name);
ASSERT(result == 1);
if (!it->second->updateValue(pressure)) {
return false;
}
const auto trigger_new_state = it->second->actionState();
if (trigger_new_state.isSaturated()) {
active_gauge_.set(1);
scaling_gauge_.set(0);
} else {
active_gauge_.set(0);
scaling_gauge_.set(1);
}

{
akonradi marked this conversation as resolved.
Show resolved Hide resolved
OverloadActionState new_state = OverloadActionState::inactive();
for (auto& trigger : triggers_) {
const auto trigger_state = trigger.second->actionState();
if (trigger_state.value() > new_state.value()) {
new_state = trigger_state;
}
}
state_ = new_state;
}

return active != isActive();
return state_.value() != old_state.value();
}

bool OverloadAction::isActive() const { return !fired_triggers_.empty(); }
OverloadActionState OverloadAction::getState() const { return state_; }

OverloadManagerImpl::OverloadManagerImpl(Event::Dispatcher& dispatcher, Stats::Scope& stats_scope,
ThreadLocal::SlotAllocator& slot_allocator,
Expand Down Expand Up @@ -223,12 +284,14 @@ void OverloadManagerImpl::updateResourcePressure(const std::string& resource, do
const std::string& action = entry.second;
auto action_it = actions_.find(action);
ASSERT(action_it != actions_.end());
const OverloadActionState old_state = action_it->second.getState();
if (action_it->second.updateResourcePressure(resource, pressure)) {
const bool is_active = action_it->second.isActive();
const auto state =
is_active ? OverloadActionState::Active : OverloadActionState::Inactive;
ENVOY_LOG(info, "Overload action {} became {}", action,
is_active ? "active" : "inactive");
const auto state = action_it->second.getState();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is additional work needed to optimize update propagation? It seems that each action is propagated independently to workers. updateResourcePressure returns true a lot more often than before. Is there some potential for aggregation?

How often does updateResourcePressure run anyway? Should the code below wait for propagation to complete before returning as a way to ensure that at most 1 update is in progress?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I think it should be straightforward to aggregate a bunch of <action, state> pairs in this loop and then send them to workers with a single call to runOnAllThreads. Maybe best to do this in a follow-on PR?

Re: how often this is called - our best practices doc for use as an edge proxy has an example config where we set this to 250ms (https://www.envoyproxy.io/docs/envoy/latest/configuration/best_practices/edge)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think there are a couple optimizations we can apply here. Right now we post an event to each worker for each resource update, but we can do a better job of coalescing those. updateResourcePressure returns true more often only if the proxy is configured with a range trigger, which none should be yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to address this an Elisha's other comment about the 'inactive' state in a follow-on PR.


if (old_state.isSaturated() != state.isSaturated()) {
ENVOY_LOG(info, "Overload action {} became {}", action,
(state.isSaturated() ? "saturated" : "scaling"));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is some logging appropriate for scaling state transitions?

Possibly LOG_EVERY_N_SEC when scaling, and/or LOG when entering and exiting scaling mode.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably a good idea. Does Envoy have a LOG_EVERY_N_SEC macro?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't seem like it. At least not yet.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should also be at debug level I think.

tls_->runOnAllThreads([this, action, state] {
tls_->getTyped<ThreadLocalOverloadStateImpl>().setState(action, state);
});
Expand Down
11 changes: 6 additions & 5 deletions source/server/overload_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class OverloadAction {
// has changed state.
bool updateResourcePressure(const std::string& name, double pressure);

// Returns whether the action is currently active or not.
bool isActive() const;
// Returns the current action state, which is the max state across all registered triggers.
OverloadActionState getState() const;

class Trigger {
public:
Expand All @@ -40,15 +40,16 @@ class OverloadAction {
// Updates the current value of the metric and returns whether the trigger has changed state.
virtual bool updateValue(double value) PURE;

// Returns whether the trigger is currently fired or not.
virtual bool isFired() const PURE;
// Returns the action state for the trigger.
virtual OverloadActionState actionState() const PURE;
};
using TriggerPtr = std::unique_ptr<Trigger>;

private:
absl::node_hash_map<std::string, TriggerPtr> triggers_;
absl::node_hash_set<std::string> fired_triggers_;
OverloadActionState state_;
Stats::Gauge& active_gauge_;
Stats::Gauge& scaling_gauge_;
};

class OverloadManagerImpl : Logger::Loggable<Logger::Id::main>, public OverloadManager {
Expand Down
7 changes: 2 additions & 5 deletions source/server/worker_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,10 @@ void WorkerImpl::threadRoutine(GuardDog& guard_dog) {
}

void WorkerImpl::stopAcceptingConnectionsCb(OverloadActionState state) {
switch (state) {
case OverloadActionState::Active:
if (state.isSaturated()) {
handler_->disableListeners();
break;
case OverloadActionState::Inactive:
} else {
handler_->enableListeners();
break;
}
}

Expand Down
Loading