Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support proactive checks in overload manager Api #18256

Merged
merged 11 commits into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions envoy/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ envoy_cc_library(
"//envoy/api:api_interface",
"//envoy/event:dispatcher_interface",
"//envoy/protobuf:message_validator_interface",
"//envoy/server/overload:proactive_resource_monitor",
],
)

Expand Down
9 changes: 9 additions & 0 deletions envoy/server/overload/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,18 @@ envoy_cc_library(
name = "thread_local_overload_state",
hdrs = ["thread_local_overload_state.h"],
deps = [
":proactive_resource_monitor",
"//envoy/event:scaled_range_timer_manager_interface",
"//envoy/event:timer_interface",
"//envoy/thread_local:thread_local_object",
"//source/common/common:interval_value",
],
)

envoy_cc_library(
name = "proactive_resource_monitor",
hdrs = ["proactive_resource_monitor.h"],
deps = [
"//envoy/stats:stats_interface",
],
)
86 changes: 86 additions & 0 deletions envoy/server/overload/proactive_resource_monitor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#pragma once

#include <string>

#include "envoy/common/pure.h"
#include "envoy/stats/scope.h"
#include "envoy/stats/stats.h"

#include "source/common/common/assert.h"
#include "source/common/stats/symbol_table_impl.h"

namespace Envoy {
namespace Server {

class ProactiveResourceMonitor {
public:
ProactiveResourceMonitor() = default;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we're using both uint64_t and int64_t in this interface, perhaps we can simplify on one to avoid lots of cast. Thoughts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

switched all to int64_t

virtual ~ProactiveResourceMonitor() = default;
/**
* Tries to allocate resource for given resource monitor in thread safe manner.
* Returns true if there is enough resource quota available and allocation has succeeded, false
* otherwise.
* @param increment to add to current resource usage value and compare against configured max
* threshold.
*/
virtual bool tryAllocateResource(int64_t increment) PURE;
/**
* Tries to deallocate resource for given resource monitor in thread safe manner.
* Returns true if there is enough resource quota available and deallocation has succeeded, false
* otherwise.
* @param decrement to subtract from current resource usage value.
*/
virtual bool tryDeallocateResource(int64_t decrement) PURE;
/**
* Returns current resource usage tracked by monitor.
*/
virtual int64_t currentResourceUsage() const PURE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naive question: is the semantics of this to to block on that read or will implementors get the most recent read or?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Callers will get most recent read, added explanation to api docs.

/**
* Returns max resource usage configured in monitor.
*/
virtual uint64_t maxResourceUsage() const PURE;
};

using ProactiveResourceMonitorPtr = std::unique_ptr<ProactiveResourceMonitor>;

class ProactiveResource {
public:
ProactiveResource(const std::string& name, ProactiveResourceMonitorPtr monitor,
Stats::Scope& stats_scope)
: name_(name), monitor_(std::move(monitor)),
failed_updates_counter_(makeCounter(stats_scope, name, "failed_updates")) {}

bool tryAllocateResource(int64_t increment) {
if (monitor_->tryAllocateResource(increment)) {
return true;
} else {
failed_updates_counter_.inc();
return false;
}
}

bool tryDeallocateResource(int64_t decrement) {
if (monitor_->tryDeallocateResource(decrement)) {
return true;
} else {
failed_updates_counter_.inc();
return false;
}
}

int64_t currentResourceUsage() { return monitor_->currentResourceUsage(); }

private:
const std::string name_;
ProactiveResourceMonitorPtr monitor_;
Stats::Counter& failed_updates_counter_;

Stats::Counter& makeCounter(Stats::Scope& scope, absl::string_view a, absl::string_view b) {
Stats::StatNameManagedStorage stat_name(absl::StrCat("overload.", a, ".", b),
scope.symbolTable());
return scope.counterFromStatName(stat_name.statName());
}
};

} // namespace Server
} // namespace Envoy
47 changes: 47 additions & 0 deletions envoy/server/overload/thread_local_overload_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,34 @@
#include "envoy/common/pure.h"
#include "envoy/event/scaled_range_timer_manager.h"
#include "envoy/event/timer.h"
#include "envoy/server/overload/proactive_resource_monitor.h"
#include "envoy/thread_local/thread_local_object.h"

#include "source/common/common/interval_value.h"

namespace Envoy {
namespace Server {

enum class OverloadProactiveResourceName {
GlobalDownstreamMaxConnections,
};

class OverloadProactiveResourceNameValues {
public:
// Overload action to stop accepting new HTTP requests.
const std::string GlobalDownstreamMaxConnections =
"envoy.resource_monitors.global_downstream_max_connections";

std::set<std::string> proactive_resource_names_{GlobalDownstreamMaxConnections};

absl::flat_hash_map<std::string, OverloadProactiveResourceName>
proactive_action_name_to_resource_ = {
{GlobalDownstreamMaxConnections,
OverloadProactiveResourceName::GlobalDownstreamMaxConnections}};
};

using OverloadProactiveResourceNames = ConstSingleton<OverloadProactiveResourceNameValues>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The enum and this alias are almost the same apart from an 's'. Perhaps there's a better name?


/**
* Tracks the state of an overload action. The state is a number between 0 and 1 that represents the
* level of saturation. The values are categorized in two groups:
Expand Down Expand Up @@ -46,6 +67,32 @@ class ThreadLocalOverloadState : public ThreadLocal::ThreadLocalObject {
public:
// Get a thread-local reference to the value for the given action key.
virtual const OverloadActionState& getState(const std::string& action) PURE;
/**
* Invokes corresponding resource monitor to allocate resource for given resource monitor in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invokes the corresponding

same below

* thread safe manner. Returns true if there is enough resource quota available and allocation has
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in a thread safe manner.

Same below.

* succeeded, false if allocation failed or resource is not registered.
* @param name of corresponding resource monitor.
* @param increment to add to current resource usage value within monitor.
*/
virtual bool tryAllocateResource(OverloadProactiveResourceName resource_name,
int64_t increment) PURE;
/**
* Invokes corresponding resource monitor to deallocate resource for given resource monitor in
* thread safe manner. Returns true if there is enough resource quota available and deallocation
* has succeeded, false if deallocation failed or resource is not registered.
* @param name of corresponding resource monitor.
* @param decrement to subtract from current resource usage value within monitor.
*/
virtual bool tryDeallocateResource(OverloadProactiveResourceName resource_name,
int64_t decrement) PURE;

/**
* TODO(nezdolik) remove this method once downstream connection tracking is fully moved to
* overload manager. Checks if resource monitor is registered and resource usage tracking is
* enabled in overload manager. Returns true if resource monitor is registered, false otherwise.
* @param name of resource monitor to check.
*/
virtual bool isResourceMonitorEnabled(OverloadProactiveResourceName resource_name) PURE;
};

} // namespace Server
Expand Down
43 changes: 23 additions & 20 deletions envoy/server/resource_monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

#include "envoy/common/exception.h"
#include "envoy/common/pure.h"
#include "envoy/event/dispatcher.h"

#include "source/common/common/assert.h"

namespace Envoy {
namespace Server {
Expand All @@ -18,36 +21,36 @@ struct ResourceUsage {
double resource_pressure_;
};

class ResourceMonitor {
/**
* Notifies caller of updated resource usage.
*/
class ResourceUpdateCallbacks {
public:
virtual ~ResourceMonitor() = default;
virtual ~ResourceUpdateCallbacks() = default;

/**
* Called when the request for updated resource usage succeeds.
* @param usage the updated resource usage
*/
virtual void onSuccess(const ResourceUsage& usage) PURE;

/**
* Notifies caller of updated resource usage.
* Called when the request for updated resource usage fails.
* @param error the exception caught when trying to get updated resource usage
*/
class Callbacks {
public:
virtual ~Callbacks() = default;

/**
* Called when the request for updated resource usage succeeds.
* @param usage the updated resource usage
*/
virtual void onSuccess(const ResourceUsage& usage) PURE;

/**
* Called when the request for updated resource usage fails.
* @param error the exception caught when trying to get updated resource usage
*/
virtual void onFailure(const EnvoyException& error) PURE;
};
virtual void onFailure(const EnvoyException& error) PURE;
};

class ResourceMonitor {
public:
virtual ~ResourceMonitor() = default;

/**
* Recalculate resource usage.
* This must be non-blocking so if RPCs need to be made they should be
* done asynchronously and invoke the callback when finished.
*/
virtual void updateResourceUsage(Callbacks& callbacks) PURE;
virtual void updateResourceUsage(ResourceUpdateCallbacks& callbacks) PURE;
};

using ResourceMonitorPtr = std::unique_ptr<ResourceMonitor>;
Expand Down
21 changes: 21 additions & 0 deletions envoy/server/resource_monitor_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "envoy/event/dispatcher.h"
#include "envoy/protobuf/message_validator.h"
#include "envoy/server/options.h"
#include "envoy/server/overload/proactive_resource_monitor.h"
#include "envoy/server/resource_monitor.h"

#include "source/common/protobuf/protobuf.h"
Expand Down Expand Up @@ -64,6 +65,26 @@ class ResourceMonitorFactory : public Config::TypedFactory {
std::string category() const override { return "envoy.resource_monitors"; }
};

class ProactiveResourceMonitorFactory : public Config::TypedFactory {
public:
~ProactiveResourceMonitorFactory() override = default;

/**
* Create a particular proactive resource monitor implementation.
* @param config const ProtoBuf::Message& supplies the config for the proactive resource monitor
* implementation.
* @param context ProactiveResourceMonitorFactoryContext& supplies the resource monitor's context.
* @return ProactiveResourceMonitorPtr the resource monitor instance. Should not be nullptr.
* @throw EnvoyException if the implementation is unable to produce an instance with
* the provided parameters.
*/
virtual ProactiveResourceMonitorPtr
createProactiveResourceMonitor(const Protobuf::Message& config,
ResourceMonitorFactoryContext& context) PURE;

std::string category() const override { return "envoy.resource_monitors"; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whats the difference between this factory and the resource monitor factory? (Sorry if I'm being dense, they just look the same and I wonder whether they could be collapsed) Thanks

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only difference is the return type for newly created monitor. We could have had instead two methods within ResourceMonitorFactory to create regular and proactive resource monitor. The downside there would be that each resource monitor type config factory will always need to provide implementation for both methods and one of them will always be "unimplemented". I see this as more evil (mode code duplication) that duplicating factory class here.

};

} // namespace Configuration
} // namespace Server
} // namespace Envoy
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ FixedHeapMonitor::FixedHeapMonitor(
ASSERT(max_heap_ > 0);
}

void FixedHeapMonitor::updateResourceUsage(Server::ResourceMonitor::Callbacks& callbacks) {
void FixedHeapMonitor::updateResourceUsage(Server::ResourceUpdateCallbacks& callbacks) {
const size_t physical = stats_->reservedHeapBytes();
const size_t unmapped = stats_->unmappedHeapBytes();
ASSERT(physical >= unmapped);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class FixedHeapMonitor : public Server::ResourceMonitor {
const envoy::extensions::resource_monitors::fixed_heap::v3::FixedHeapConfig& config,
std::unique_ptr<MemoryStatsReader> stats = std::make_unique<MemoryStatsReader>());

void updateResourceUsage(Server::ResourceMonitor::Callbacks& callbacks) override;
void updateResourceUsage(Server::ResourceUpdateCallbacks& callbacks) override;

private:
const uint64_t max_heap_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ InjectedResourceMonitor::InjectedResourceMonitor(

void InjectedResourceMonitor::onFileChanged() { file_changed_ = true; }

void InjectedResourceMonitor::updateResourceUsage(Server::ResourceMonitor::Callbacks& callbacks) {
void InjectedResourceMonitor::updateResourceUsage(Server::ResourceUpdateCallbacks& callbacks) {
if (file_changed_) {
file_changed_ = false;
TRY_ASSERT_MAIN_THREAD {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class InjectedResourceMonitor : public Server::ResourceMonitor {
Server::Configuration::ResourceMonitorFactoryContext& context);

// Server::ResourceMonitor
void updateResourceUsage(Server::ResourceMonitor::Callbacks& callbacks) override;
void updateResourceUsage(Server::ResourceUpdateCallbacks& callbacks) override;

protected:
virtual void onFileChanged();
Expand Down
3 changes: 3 additions & 0 deletions source/server/admin/admin.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ class AdminImpl : public Admin,
struct NullThreadLocalOverloadState : public ThreadLocalOverloadState {
NullThreadLocalOverloadState(Event::Dispatcher& dispatcher) : dispatcher_(dispatcher) {}
const OverloadActionState& getState(const std::string&) override { return inactive_; }
bool tryAllocateResource(OverloadProactiveResourceName, int64_t) override { return false; }
bool tryDeallocateResource(OverloadProactiveResourceName, int64_t) override { return false; }
bool isResourceMonitorEnabled(OverloadProactiveResourceName) override { return false; }
Event::Dispatcher& dispatcher_;
const OverloadActionState inactive_ = OverloadActionState::inactive();
};
Expand Down
Loading