load balancer: Add a option to configure the table size of maglev (#1…

…2870) Currently, the maglev hash algorithm default to table size to 65537. It is the recommended size by a paper but it is better if the user can set this value. This patch introduces a new MaglevLbConfig that contains table size of maglev. So, now, the user can set the table size of maglev by their situation. Signed-off-by: DongRyeol Cha <[email protected]>
envoyproxy · Sep 3, 2020 · 5fd73ca · 5fd73ca
1 parent f95b390
commit 5fd73ca
Show file tree

Hide file tree

Showing 18 changed files with 183 additions and 64 deletions.
diff --git a/api/envoy/config/cluster/v3/cluster.proto b/api/envoy/config/cluster/v3/cluster.proto
@@ -43,7 +43,7 @@ message ClusterCollection {
 }
 
 // Configuration for a single upstream cluster.
-// [#next-free-field: 52]
+// [#next-free-field: 53]
 message Cluster {
   option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.Cluster";
 
@@ -410,6 +410,16 @@ message Cluster {
     google.protobuf.UInt64Value maximum_ring_size = 4 [(validate.rules).uint64 = {lte: 8388608}];
   }
 
+  // Specific configuration for the :ref:`Maglev<arch_overview_load_balancing_types_maglev>`
+  // load balancing policy.
+  message MaglevLbConfig {
+    // The table size for Maglev hashing. The Maglev aims for ‘minimal disruption’ rather than an absolute guarantee.
+    // Minimal disruption means that when the set of upstreams changes, a connection will likely be sent to the same
+    // upstream as it was before. Increasing the table size reduces the amount of disruption.
+    // The table size must be prime number. If it is not specified, the default is 65537.
+    google.protobuf.UInt64Value table_size = 1;
+  }
+
   // Specific configuration for the
   // :ref:`Original Destination <arch_overview_load_balancing_types_original_destination>`
   // load balancing policy.
@@ -830,15 +840,19 @@ message Cluster {
 
   // Optional configuration for the load balancing algorithm selected by
   // LbPolicy. Currently only
-  // :ref:`RING_HASH<envoy_api_enum_value_config.cluster.v3.Cluster.LbPolicy.RING_HASH>` and
+  // :ref:`RING_HASH<envoy_api_enum_value_config.cluster.v3.Cluster.LbPolicy.RING_HASH>`,
+  // :ref:`MAGLEV<envoy_api_enum_value_config.cluster.v3.Cluster.LbPolicy.MAGLEV>` and
   // :ref:`LEAST_REQUEST<envoy_api_enum_value_config.cluster.v3.Cluster.LbPolicy.LEAST_REQUEST>`
   // has additional configuration options.
-  // Specifying ring_hash_lb_config or least_request_lb_config without setting the corresponding
+  // Specifying ring_hash_lb_config or maglev_lb_config or least_request_lb_config without setting the corresponding
   // LbPolicy will generate an error at runtime.
   oneof lb_config {
     // Optional configuration for the Ring Hash load balancing policy.
     RingHashLbConfig ring_hash_lb_config = 23;
 
+    // Optional configuration for the Maglev load balancing policy.
+    MaglevLbConfig maglev_lb_config = 52;
+
     // Optional configuration for the Original Destination load balancing policy.
     OriginalDstLbConfig original_dst_lb_config = 34;
 

diff --git a/api/envoy/config/cluster/v4alpha/cluster.proto b/api/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst
@@ -74,6 +74,7 @@ New Features
 * http: added support for :ref:`%DOWNSTREAM_PEER_FINGERPRINT_1% <config_http_conn_man_headers_custom_request_headers>` as custom header.
 * http: added :ref:`allow_chunked_length <envoy_v3_api_field_config.core.v3.Http1ProtocolOptions.allow_chunked_length>` configuration option for HTTP/1 codec to allow processing requests/responses with both Content-Length and Transfer-Encoding: chunked headers. If such message is served and option is enabled - per RFC Content-Length is ignored and removed.
 * http: introduced new HTTP/1 and HTTP/2 codec implementations that will remove the use of exceptions for control flow due to high risk factors and instead use error statuses. The old behavior is used by default, but the new codecs can be enabled for testing by setting the runtime feature `envoy.reloadable_features.new_codec_behavior` to true. The new codecs will be in development for one month, and then enabled by default while the old codecs are deprecated.
+* load balancer: added :ref:`RingHashLbConfig<envoy_v3_api_msg_config.cluster.v3.Cluster.MaglevLbConfig>` to configure the table size of Maglev consistent hash.
 * load balancer: added a :ref:`configuration<envoy_v3_api_msg_config.cluster.v3.Cluster.LeastRequestLbConfig>` option to specify the active request bias used by the least request load balancer.
 * load balancer: added an :ref:`option <envoy_v3_api_field_config.cluster.v3.Cluster.LbSubsetConfig.LbSubsetSelector.single_host_per_subset>` to optimize subset load balancing when there is only one host per subset.
 * load balancer: added support for bounded load per host for consistent hash load balancers via :ref:`hash_balance_factor <envoy_api_field_Cluster.CommonLbConfig.consistent_hashing_lb_config>`.

diff --git a/generated_api_shadow/envoy/config/cluster/v3/cluster.proto b/generated_api_shadow/envoy/config/cluster/v3/cluster.proto
diff --git a/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto b/generated_api_shadow/envoy/config/cluster/v4alpha/cluster.proto
diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h
@@ -812,6 +812,12 @@ class ClusterInfo {
   virtual const absl::optional<envoy::config::cluster::v3::Cluster::RingHashLbConfig>&
   lbRingHashConfig() const PURE;
 
+  /**
+   * @return configuration for maglev load balancing, only used if type is set to maglev_lb.
+   */
+  virtual const absl::optional<envoy::config::cluster::v3::Cluster::MaglevLbConfig>&
+  lbMaglevConfig() const PURE;
+
   /**
    * @return const absl::optional<envoy::config::cluster::v3::Cluster::OriginalDstLbConfig>& the
    * configuration for the Original Destination load balancing policy, only used if type is set to

diff --git a/source/common/upstream/cluster_manager_impl.cc b/source/common/upstream/cluster_manager_impl.cc
@@ -776,7 +776,7 @@ void ClusterManagerImpl::loadCluster(const envoy::config::cluster::v3::Cluster&
       cluster_entry_it->second->thread_aware_lb_ = std::make_unique<MaglevLoadBalancer>(
           cluster_reference.prioritySet(), cluster_reference.info()->stats(),
           cluster_reference.info()->statsScope(), runtime_, random_,
-          cluster_reference.info()->lbConfig());
+          cluster_reference.info()->lbMaglevConfig(), cluster_reference.info()->lbConfig());
     }
   } else if (cluster_reference.info()->lbType() == LoadBalancerType::ClusterProvided) {
     cluster_entry_it->second->thread_aware_lb_ = std::move(new_cluster_pair.second);
@@ -1240,8 +1240,8 @@ ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::ClusterEntry(
     lb_ = std::make_unique<SubsetLoadBalancer>(
         cluster->lbType(), priority_set_, parent_.local_priority_set_, cluster->stats(),
         cluster->statsScope(), parent.parent_.runtime_, parent.parent_.random_,
-        cluster->lbSubsetInfo(), cluster->lbRingHashConfig(), cluster->lbLeastRequestConfig(),
-        cluster->lbConfig());
+        cluster->lbSubsetInfo(), cluster->lbRingHashConfig(), cluster->lbMaglevConfig(),
+        cluster->lbLeastRequestConfig(), cluster->lbConfig());
   } else {
     switch (cluster->lbType()) {
     case LoadBalancerType::LeastRequest: {

diff --git a/source/common/upstream/maglev_lb.cc b/source/common/upstream/maglev_lb.cc
@@ -9,13 +9,6 @@ MaglevTable::MaglevTable(const NormalizedHostWeightVector& normalized_host_weigh
                          double max_normalized_weight, uint64_t table_size,
                          bool use_hostname_for_hashing, MaglevLoadBalancerStats& stats)
     : table_size_(table_size), stats_(stats) {
-  // TODO(mattklein123): The Maglev table must have a size that is a prime number for the algorithm
-  // to work. Currently, the table size is not user configurable. In the future, if the table size
-  // is made user configurable, we will need proper error checking that the user cannot configure a
-  // size that is not prime (the result is going to be an infinite loop with some inputs which is
-  // not good!).
-  ASSERT(Primes::isPrime(table_size));
-
   // We can't do anything sensible with no hosts.
   if (normalized_host_weights.empty()) {
     return;
@@ -102,16 +95,25 @@ uint64_t MaglevTable::permutation(const TableBuildEntry& entry) {
 MaglevLoadBalancer::MaglevLoadBalancer(
     const PrioritySet& priority_set, ClusterStats& stats, Stats::Scope& scope,
     Runtime::Loader& runtime, Random::RandomGenerator& random,
-    const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config, uint64_t table_size)
+    const absl::optional<envoy::config::cluster::v3::Cluster::MaglevLbConfig>& config,
+    const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config)
     : ThreadAwareLoadBalancerBase(priority_set, stats, runtime, random, common_config),
       scope_(scope.createScope("maglev_lb.")), stats_(generateStats(*scope_)),
-      table_size_(table_size),
+      table_size_(config ? PROTOBUF_GET_WRAPPED_OR_DEFAULT(config.value(), table_size,
+                                                           MaglevTable::DefaultTableSize)
+                         : MaglevTable::DefaultTableSize),
       use_hostname_for_hashing_(
           common_config.has_consistent_hashing_lb_config()
               ? common_config.consistent_hashing_lb_config().use_hostname_for_hashing()
               : false),
       hash_balance_factor_(PROTOBUF_GET_WRAPPED_OR_DEFAULT(
-          common_config.consistent_hashing_lb_config(), hash_balance_factor, 0)) {}
+          common_config.consistent_hashing_lb_config(), hash_balance_factor, 0)) {
+  ENVOY_LOG(debug, "maglev table size: {}", table_size_);
+  // The table size must be prime number.
+  if (!Primes::isPrime(table_size_)) {
+    throw EnvoyException("The table size of maglev must be prime number");
+  }
+}
 
 MaglevLoadBalancerStats MaglevLoadBalancer::generateStats(Stats::Scope& scope) {
   return {ALL_MAGLEV_LOAD_BALANCER_STATS(POOL_GAUGE(scope))};

diff --git a/source/common/upstream/maglev_lb.h b/source/common/upstream/maglev_lb.h
@@ -68,14 +68,17 @@ class MaglevTable : public ThreadAwareLoadBalancerBase::HashingLoadBalancer,
 /**
  * Thread aware load balancer implementation for Maglev.
  */
-class MaglevLoadBalancer : public ThreadAwareLoadBalancerBase {
+class MaglevLoadBalancer : public ThreadAwareLoadBalancerBase,
+                           Logger::Loggable<Logger::Id::upstream> {
 public:
-  MaglevLoadBalancer(const PrioritySet& priority_set, ClusterStats& stats, Stats::Scope& scope,
-                     Runtime::Loader& runtime, Random::RandomGenerator& random,
-                     const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config,
-                     uint64_t table_size = MaglevTable::DefaultTableSize);
+  MaglevLoadBalancer(
+      const PrioritySet& priority_set, ClusterStats& stats, Stats::Scope& scope,
+      Runtime::Loader& runtime, Random::RandomGenerator& random,
+      const absl::optional<envoy::config::cluster::v3::Cluster::MaglevLbConfig>& config,
+      const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config);
 
   const MaglevLoadBalancerStats& stats() const { return stats_; }
+  uint64_t tableSize() const { return table_size_; }
 
 private:
   // ThreadAwareLoadBalancerBase

diff --git a/source/common/upstream/subset_lb.cc b/source/common/upstream/subset_lb.cc
@@ -25,12 +25,14 @@ SubsetLoadBalancer::SubsetLoadBalancer(
     Random::RandomGenerator& random, const LoadBalancerSubsetInfo& subsets,
     const absl::optional<envoy::config::cluster::v3::Cluster::RingHashLbConfig>&
         lb_ring_hash_config,
+    const absl::optional<envoy::config::cluster::v3::Cluster::MaglevLbConfig>& lb_maglev_config,
     const absl::optional<envoy::config::cluster::v3::Cluster::LeastRequestLbConfig>&
         least_request_config,
     const envoy::config::cluster::v3::Cluster::CommonLbConfig& common_config)
     : lb_type_(lb_type), lb_ring_hash_config_(lb_ring_hash_config),
-      least_request_config_(least_request_config), common_config_(common_config), stats_(stats),
-      scope_(scope), runtime_(runtime), random_(random), fallback_policy_(subsets.fallbackPolicy()),
+      lb_maglev_config_(lb_maglev_config), least_request_config_(least_request_config),
+      common_config_(common_config), stats_(stats), scope_(scope), runtime_(runtime),
+      random_(random), fallback_policy_(subsets.fallbackPolicy()),
       default_subset_metadata_(subsets.defaultSubset().fields().begin(),
                                subsets.defaultSubset().fields().end()),
       subset_selectors_(subsets.subsetSelectors()), original_priority_set_(priority_set),
@@ -769,7 +771,7 @@ SubsetLoadBalancer::PrioritySubsetImpl::PrioritySubsetImpl(const SubsetLoadBalan
     // can also use a thread aware sub-LB properly. The following works fine but is not optimal.
     thread_aware_lb_ = std::make_unique<MaglevLoadBalancer>(
         *this, subset_lb.stats_, subset_lb.scope_, subset_lb.runtime_, subset_lb.random_,
-        subset_lb.common_config_);
+        subset_lb.lb_maglev_config_, subset_lb.common_config_);
     thread_aware_lb_->initialize();
     lb_ = thread_aware_lb_->factory()->create();
     break;