envoyproxy · jmarantz · Jul 15, 2020 · May 19, 2020 · May 19, 2020 · May 20, 2020
diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto
@@ -319,6 +319,31 @@ message Cluster {
     // The number of random healthy hosts from which the host with the fewest active requests will
     // be chosen. Defaults to 2 so that we perform two-choice selection if the field is not set.
     google.protobuf.UInt32Value choice_count = 1 [(validate.rules).uint32 = {gte: 2}];
+
+    // The following formula is used to calculate the dynamic weights when hosts have different load
+    // balancing weights:
+    //
+    // `weight = load_balancing_weight / (active_requests + 1)^active_request_bias`
+    //
+    // The larger the active request bias is, the more aggressively active requests will lower the
+    // effective weight when all host weights are not equal.
+    //
+    // `active_request_bias` must be greater than or equal to 0.0.
+    //
+    // When `active_request_bias == 0.0` the Least Request Load Balancer doesn't consider the number
+    // of active requests at the time it picks a host and behaves like the Round Robin Load
+    // Balancer.
+    //
+    // When `active_request_bias > 0.0` the Least Request Load Balancer scales the load balancing
+    // weight by the number of active requests at the time it does a pick.
+    //
+    // The value is cached for performance reasons and refreshed whenever one of the Load Balancer's
+    // host sets changes, e.g., whenever there is a host membership update or a host load balancing
+    // weight change.
+    //
+    // .. note::
+    //   This setting only takes effect if all host weights are not equal.
+    core.v3.RuntimeDouble active_request_bias = 2;
   size_t clusterMemoryHelper(int num_clusters, int num_hosts, bool allow_stats) { 
     Stats::TestUtil::MemoryTest memory_test; 
     config_helper_.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) { 
       if (!allow_stats) { 
         bootstrap.mutable_stats_config()->mutable_stats_matcher()->set_reject_all(true); 
       } 
       for (int i = 1; i < num_clusters; ++i) { 
         auto* cluster = bootstrap.mutable_static_resources()->add_clusters(); 
         cluster->set_name(absl::StrCat("cluster_", i)); 
       } 
       for (int i = 0; i < num_clusters; ++i) { 
         auto* cluster = bootstrap.mutable_static_resources()->mutable_clusters(i); 
         for (int j = 0; j < num_hosts; ++j) { 
           auto* host = cluster->mutable_load_assignment() 
                            ->mutable_endpoints(0) 
                            ->add_lb_endpoints() 
                            ->mutable_endpoint() 
                            ->mutable_address(); 
           auto* socket_address = host->mutable_socket_address(); 
           socket_address->set_protocol(envoy::config::core::v3::SocketAddress::TCP); 
           socket_address->set_address("0.0.0.0"); 
           socket_address->set_port_value(80); 
         } 
       } 
     }); 
     initialize(); 
     return memory_test.consumedBytes(); 
   } 
 }; 
   size_t clusterMemoryHelper(int num_clusters, int num_hosts, bool allow_stats) { 
     Stats::TestUtil::MemoryTest memory_test; 
     config_helper_.addConfigModifier([&](envoy::config::bootstrap::v3::Bootstrap& bootstrap) { 
       if (!allow_stats) { 
         bootstrap.mutable_stats_config()->mutable_stats_matcher()->set_reject_all(true); 
       } 
       for (int i = 1; i < num_clusters; ++i) { 
         auto* cluster = bootstrap.mutable_static_resources()->add_clusters(); 
         cluster->set_name(absl::StrCat("cluster_", i)); 
       } 
  
       for (int i = 0; i < num_clusters; ++i) { 
         auto* cluster = bootstrap.mutable_static_resources()->mutable_clusters(i); 
         for (int j = 0; j < num_hosts; ++j) { 
           auto* host = cluster->mutable_load_assignment() 
                            ->mutable_endpoints(0) 
                            ->add_lb_endpoints() 
                            ->mutable_endpoint() 
                            ->mutable_address(); 
           auto* socket_address = host->mutable_socket_address(); 
           socket_address->set_protocol(envoy::config::core::v3::SocketAddress::TCP); 
           socket_address->set_address("0.0.0.0"); 
           socket_address->set_port_value(80); 
         } 
       } 
     }); 
     initialize(); 
  
     return memory_test.consumedBytes(); 
   } 
 }; 
   }
 
   // Specific configuration for the :ref:`RingHash<arch_overview_load_balancing_types_ring_hash>`

diff --git a/api/envoy/config/cluster/v4alpha/cluster.proto b/api/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/docs/root/intro/arch_overview/upstream/load_balancing/load_balancers.rst b/docs/root/intro/arch_overview/upstream/load_balancing/load_balancers.rst
@@ -41,11 +41,25 @@ same or different weights.
   less than or equal to all of the other hosts.
 * *all weights not equal*:  If two or more hosts in the cluster have different load balancing
   weights, the load balancer shifts into a mode where it uses a weighted round robin schedule in
-  which weights are dynamically adjusted based on the host's request load at the time of selection
-  (weight is divided by the current active request count. For example, a host with weight 2 and an
-  active request count of 4 will have a synthetic weight of 2 / 4 = 0.5). This algorithm provides
-  good balance at steady state but may not adapt to load imbalance as quickly. Additionally, unlike
-  P2C, a host will never truly drain, though it will receive fewer requests over time.
+  which weights are dynamically adjusted based on the host's request load at the time of selection.
+
+  In this case the weights are calculated at the time a host is picked using the following formula:
+
+  `weight = load_balancing_weight / (active_requests + 1)^active_request_bias`.
+
+  :ref:`active_request_bias<envoy_v3_api_field_config.cluster.v3.Cluster.LeastRequestLbConfig.active_request_bias>`
+  can be configured via runtime and defaults to 1.0. It must be greater than or equal to 0.0.
+
+  The larger the active request bias is, the more aggressively active requests will lower the
+  effective weight.
+
+  If `active_request_bias` is set to 0.0, the least request load balancer behaves like the round
+  robin load balancer and ignores the active request count at the time of picking.
+
+  For example, if active_request_bias is 1.0, a host with weight 2 and an active request count of 4
+  will have an effective weight of 2 / (4 + 1)^1 = 0.4. This algorithm provides good balance at
+  steady state but may not adapt to load imbalance as quickly. Additionally, unlike P2C, a host will
+  never truly drain, though it will receive fewer requests over time.
 
 .. _arch_overview_load_balancing_types_ring_hash:
 

diff --git a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto
diff --git a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/source/common/runtime/BUILD b/source/common/runtime/BUILD
@@ -34,6 +34,7 @@ envoy_cc_library(
     ],
     deps = [
         "//include/envoy/runtime:runtime_interface",
+        "//source/common/protobuf:utility_lib",
         "@envoy_api//envoy/config/core/v3:pkg_cc_proto",
         "@envoy_api//envoy/type/v3:pkg_cc_proto",
     ],

diff --git a/source/common/runtime/runtime_protos.h b/source/common/runtime/runtime_protos.h
@@ -35,6 +35,8 @@ class Double {
       : runtime_key_(double_proto.runtime_key()), default_value_(double_proto.default_value()),
         runtime_(runtime) {}
 
+  const std::string& runtimeKey() const { return runtime_key_; }
+
   double value() const { return runtime_.snapshot().getDouble(runtime_key_, default_value_); }
 
 private:

diff --git a/source/common/upstream/BUILD b/source/common/upstream/BUILD
@@ -181,6 +181,7 @@ envoy_cc_library(
         "//include/envoy/upstream:upstream_interface",
         "//source/common/common:assert_lib",
         "//source/common/protobuf:utility_lib",
+        "//source/common/runtime:runtime_protos_lib",
         "@envoy_api//envoy/config/cluster/v3:pkg_cc_proto",
     ],
 )

diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h
@@ -1,6 +1,8 @@
 #pragma once
 
+#include <cmath>
 #include <cstdint>
+#include <memory>
 #include <queue>
 #include <set>
 #include <vector>
@@ -11,6 +13,7 @@
 #include "envoy/upstream/upstream.h"
 
 #include "common/protobuf/utility.h"
+#include "common/runtime/runtime_protos.h"
 #include "common/upstream/edf_scheduler.h"
 
 namespace Envoy {
@@ -367,6 +370,8 @@ class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase {
 
   void initialize();
 
+  virtual void refresh(uint32_t priority);
+
   // Seed to allow us to desynchronize load balancers across a fleet. If we don't
   // do this, multiple Envoys that receive an update at the same time (or even
   // multiple load balancers on the same host) will send requests to
@@ -375,7 +380,6 @@ class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase {
   const uint64_t seed_;
 
 private:
-  void refresh(uint32_t priority);
   virtual void refreshHostSource(const HostsSource& source) PURE;
   virtual double hostWeight(const Host& host) PURE;
   virtual HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
@@ -437,7 +441,8 @@ class RoundRobinLoadBalancer : public EdfLoadBalancerBase {
  *    The benefit of the Maglev table is at the expense of resolution, memory usage is capped.
  *    Additionally, the Maglev table can be shared amongst all threads.
  */
-class LeastRequestLoadBalancer : public EdfLoadBalancerBase {
+class LeastRequestLoadBalancer : public EdfLoadBalancerBase,
+                                 Logger::Loggable<Logger::Id::upstream> {
 public:
   LeastRequestLoadBalancer(
       const PrioritySet& priority_set, const PrioritySet* local_priority_set, ClusterStats& stats,
@@ -450,26 +455,71 @@ class LeastRequestLoadBalancer : public EdfLoadBalancerBase {
         choice_count_(
             least_request_config.has_value()
                 ? PROTOBUF_GET_WRAPPED_OR_DEFAULT(least_request_config.value(), choice_count, 2)
-                : 2) {
+                : 2),
+        active_request_bias_runtime_(
+            least_request_config.has_value() && least_request_config->has_active_request_bias()
+                ? std::make_unique<Runtime::Double>(least_request_config->active_request_bias(),
+                                                    runtime)
+                : nullptr) {
     initialize();
   }
 
+protected:
+  void refresh(uint32_t priority) override {
+    active_request_bias_ =
+        active_request_bias_runtime_ != nullptr ? active_request_bias_runtime_->value() : 1.0;
+
+    if (active_request_bias_ < 0.0) {
+      ENVOY_LOG(warn, "upstream: invalid active request bias supplied (runtime key {}), using 1.0",
 EXPECT_LOG_CONTAINS("warn", "Using deprecated XXX extension name 'deprecated' for 'canonical'.", 
 std::vector<std::string> messages_; 
 EXPECT_LOG_CONTAINS("warn", "Using deprecated XXX extension name 'deprecated' for 'canonical'.", 
 std::vector<std::string> messages_; 
+                active_request_bias_runtime_->runtimeKey());
+      active_request_bias_ = 1.0;
+    }
+
+    EdfLoadBalancerBase::refresh(priority);
+  }
+
 private:
   void refreshHostSource(const HostsSource&) override {}
   double hostWeight(const Host& host) override {
-    // Here we scale host weight by the number of active requests at the time we do the pick. We
-    // always add 1 to avoid division by 0. It might be possible to do better by picking two hosts
-    // off of the schedule, and selecting the one with fewer active requests at the time of
-    // selection.
-    // TODO(mattklein123): @htuch brings up the point that how we are scaling weight here might not
-    // be the only/best way of doing this. Essentially, it makes weight and active requests equally
-    // important. Are they equally important in practice? There is no right answer here and we might
-    // want to iterate on this as we gain more experience.
-    return static_cast<double>(host.weight()) / (host.stats().rq_active_.value() + 1);
+    // This method is called to calculate the dynamic weight as following when all load balancing
+    // weights are not equal:
+    //
+    // `weight = load_balancing_weight / (active_requests + 1)^active_request_bias`
+    //
+    // `active_request_bias` can be configured via runtime and its value is cached in
+    // `active_request_bias_` to avoid having to do a runtime lookup each time a host weight is
+    // calculated.
+    //
+    // When `active_request_bias == 0.0` we behave like `RoundRobinLoadBalancer` and return the
+    // host weight without considering the number of active requests at the time we do the pick.
+    //
+    // When `active_request_bias > 0.0` we scale the host weight by the number of active
+    // requests at the time we do the pick. We always add 1 to avoid division by 0.
+    //
+    // It might be possible to do better by picking two hosts off of the schedule, and selecting the
+    // one with fewer active requests at the time of selection.
+    if (active_request_bias_ == 0.0) {
+      return host.weight();
+    }
+
+    if (active_request_bias_ == 1.0) {
+      return static_cast<double>(host.weight()) / (host.stats().rq_active_.value() + 1);
+    }
+
+    return static_cast<double>(host.weight()) /
+           std::pow(host.stats().rq_active_.value() + 1, active_request_bias_);
   }
   HostConstSharedPtr unweightedHostPick(const HostVector& hosts_to_use,
                                         const HostsSource& source) override;
+
   const uint32_t choice_count_;
+
+  // The exponent used to calculate host weights can be configured via runtime. We cache it for
+  // performance reasons and refresh it in `LeastRequestLoadBalancer::refresh(uint32_t priority)`
+  // whenever a `HostSet` is updated.
+  double active_request_bias_{};
+
+  const std::unique_ptr<Runtime::Double> active_request_bias_runtime_;
 };
 
 /**

diff --git a/test/common/upstream/BUILD b/test/common/upstream/BUILD
@@ -192,6 +192,8 @@ envoy_cc_test(
         "//source/common/upstream:upstream_lib",
         "//test/mocks/runtime:runtime_mocks",
         "//test/mocks/upstream:upstream_mocks",
+        "//test/test_common:logging_lib",
+        "//test/test_common:test_runtime_lib",
         "@envoy_api//envoy/config/cluster/v3:pkg_cc_proto",
     ],
 )