Skip to content

Commit

Permalink
Allow duplicate weighted backend keys (#3319)
Browse files Browse the repository at this point in the history
Allow duplicate weighted backend keys

Currently, if the proxy receives two backends with the same metadata,
one of the backends will get dropped because the backend metadata is
used as the key in a hash map. Attempting to then randomly distribute
requests to the backends can panic when selecting the now non-existent
backend.

This is fixed by no longer using backend metadata as a hash map key,
instead generating separate IDs that are stored in a vec to retain the
declared order of backends while also being used to look up the backend
and associated weight independently.

Validated with new unit tests exercising duplicate backend keys, as well
as a few around the invariants use to store the backends.

Signed-off-by: Scott Fleener <[email protected]>
  • Loading branch information
sfleen authored Nov 7, 2024
1 parent db64c7a commit b34d32b
Show file tree
Hide file tree
Showing 9 changed files with 570 additions and 227 deletions.
1 change: 0 additions & 1 deletion Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,6 @@ name = "linkerd-distribute"
version = "0.1.0"
dependencies = [
"ahash",
"indexmap 2.6.0",
"linkerd-stack",
"parking_lot",
"rand",
Expand Down
1 change: 0 additions & 1 deletion linkerd/distribute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ publish = false

[dependencies]
ahash = "0.8"
indexmap = "2"
linkerd-stack = { path = "../stack" }
parking_lot = "0.12"
rand = { version = "0.8", features = ["small_rng"] }
Expand Down
163 changes: 163 additions & 0 deletions linkerd/distribute/src/keys.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
use ahash::{HashMap, HashMapExt};
use rand::{
distributions::{WeightedError, WeightedIndex},
prelude::Distribution as _,
Rng,
};
use std::hash::Hash;

/// Uniquely identifies a key/backend pair for a distribution. This allows
/// backends to have the same key and still participate in request distribution.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct KeyId {
idx: usize,
}

#[derive(Debug)]
pub struct ServiceKeys<K> {
ids: Vec<KeyId>,
keys: HashMap<KeyId, K>,
}

pub type WeightedServiceKeys<K> = ServiceKeys<WeightedKey<K>>;

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct WeightedKey<K> {
pub key: K,
pub weight: u32,
}

pub(crate) struct WeightedKeySelector<'a, K> {
keys: &'a WeightedServiceKeys<K>,
index: WeightedIndex<u32>,
}

// === impl KeyId ===

impl KeyId {
pub(crate) fn new(idx: usize) -> Self {
Self { idx }
}
}

// === impl UnweightedKeys ===

// PartialEq, Eq, and Hash are all valid to implement for UnweightedKeys since
// there is a defined iteration order for the keys, but it cannot be automatically
// derived for HashMap fields.
impl<K: PartialEq> PartialEq for ServiceKeys<K> {
fn eq(&self, other: &Self) -> bool {
if self.ids != other.ids {
return false;
}

for id in &self.ids {
if self.keys.get(id) != other.keys.get(id) {
return false;
}
}

true
}
}

impl<K: Eq> Eq for ServiceKeys<K> {}

impl<K: Hash> Hash for ServiceKeys<K> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.ids.hash(state);
// Normally we would also hash the length, but self.ids and
// self.keys have the same length
for id in &self.ids {
self.keys.get(id).hash(state);
}
}
}

impl<K> ServiceKeys<K> {
pub(crate) fn new(iter: impl Iterator<Item = K>) -> Self {
let mut ids = Vec::new();
let mut keys = HashMap::new();
for (idx, key) in iter.enumerate() {
let id = KeyId::new(idx);
ids.push(id);
keys.insert(id, key);
}

Self { ids, keys }
}

pub(crate) fn is_empty(&self) -> bool {
self.ids.is_empty()
}

pub(crate) fn len(&self) -> usize {
self.ids.len()
}

/// Returns the key `K` associated with the given [`KeyId`].
///
/// The output of using a [`KeyId`] not produced by the same instance of
/// [`ServiceKeys`] is unspecified, and it is likely to panic.
///
/// # Panics
///
/// This will panic if no entry is associated with the given lookup key.
pub(crate) fn get(&self, id: KeyId) -> &K {
self.keys
.get(&id)
.expect("distribution lookup keys must be valid")
}

fn try_get_id(&self, idx: usize) -> Option<KeyId> {
self.ids.get(idx).copied()
}

pub(crate) fn iter(&self) -> impl Iterator<Item = &KeyId> {
self.ids.iter()
}
}

// === impl WeightedKeys ===

impl<K> WeightedServiceKeys<K> {
pub(crate) fn into_unweighted(self) -> ServiceKeys<K> {
ServiceKeys {
ids: self.ids,
keys: self
.keys
.into_iter()
.map(|(id, key)| (id, key.key))
.collect(),
}
}

pub(crate) fn weighted_index(&self) -> Result<WeightedIndex<u32>, WeightedError> {
WeightedIndex::new(self.ids.iter().map(|&id| self.get(id).weight))
}

pub(crate) fn validate_weights(&self) -> Result<(), WeightedError> {
self.weighted_index()?;
Ok(())
}

pub(crate) fn selector(&self) -> WeightedKeySelector<'_, K> {
let index = self.weighted_index().expect("distribution must be valid");
WeightedKeySelector { keys: self, index }
}
}

// === impl WeightedKeySelector ===

impl<K> WeightedKeySelector<'_, K> {
pub(crate) fn select_weighted<R: Rng + ?Sized>(&self, rng: &mut R) -> KeyId {
let idx = self.index.sample(rng);
self.keys
.try_get_id(idx)
.expect("distrubtion must select a valid backend")
}

pub(crate) fn disable_backend(&mut self, id: KeyId) -> Result<(), WeightedError> {
self.index.update_weights(&[(id.idx, &0)])
}
}
4 changes: 3 additions & 1 deletion linkerd/distribute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
#![forbid(unsafe_code)]

mod cache;
mod keys;
mod params;
mod service;
mod stack;

pub use self::{
cache::{BackendCache, NewBackendCache},
params::{Backends, Distribution, WeightedKeys},
keys::WeightedServiceKeys,
params::{Backends, Distribution},
service::Distribute,
stack::NewDistribute,
};
61 changes: 21 additions & 40 deletions linkerd/distribute/src/params.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
use crate::{
keys::{ServiceKeys, WeightedKey},
WeightedServiceKeys,
};
use ahash::AHashSet;
use rand::distributions::{WeightedError, WeightedIndex};
use rand::distributions::WeightedError;
use std::{fmt::Debug, hash::Hash, sync::Arc};

#[derive(Debug, Clone, PartialEq, Eq)]
Expand All @@ -16,17 +20,11 @@ pub enum Distribution<K> {
Empty,

/// A distribution that uses the first available backend in an ordered list.
FirstAvailable(Arc<[K]>),
FirstAvailable(Arc<ServiceKeys<K>>),

/// A distribution that uses the first available backend when randomly
/// selecting over a weighted distribution of backends.
RandomAvailable(Arc<WeightedKeys<K>>),
}

#[derive(Debug, PartialEq, Eq, Hash)]
pub struct WeightedKeys<K> {
keys: Vec<K>,
weights: Vec<u32>,
RandomAvailable(Arc<WeightedServiceKeys<K>>),
}

// === impl Backends ===
Expand Down Expand Up @@ -64,46 +62,29 @@ impl<K> Default for Distribution<K> {
}

impl<K> Distribution<K> {
pub fn first_available(keys: impl IntoIterator<Item = K>) -> Self {
let keys: Arc<[K]> = keys.into_iter().collect();
pub fn first_available(iter: impl IntoIterator<Item = K>) -> Self {
let keys = ServiceKeys::new(iter.into_iter());
if keys.is_empty() {
return Self::Empty;
}
Self::FirstAvailable(keys)

Self::FirstAvailable(Arc::new(keys))
}

pub fn random_available<T: IntoIterator<Item = (K, u32)>>(
iter: T,
) -> Result<Self, WeightedError> {
let (keys, weights): (Vec<_>, Vec<_>) = iter.into_iter().filter(|(_, w)| *w > 0).unzip();
if keys.len() < 2 {
return Ok(Self::first_available(keys));
}
// Error if the distribution is invalid.
let _index = WeightedIndex::new(weights.iter().copied())?;
Ok(Self::RandomAvailable(Arc::new(WeightedKeys {
keys,
weights,
})))
}

pub(crate) fn keys(&self) -> &[K] {
match self {
Self::Empty => &[],
Self::FirstAvailable(keys) => keys,
Self::RandomAvailable(keys) => keys.keys(),
let weighted_keys = WeightedServiceKeys::new(
iter.into_iter()
.map(|(key, weight)| WeightedKey { key, weight }),
);
if weighted_keys.len() < 2 {
return Ok(Self::FirstAvailable(Arc::new(
weighted_keys.into_unweighted(),
)));
}
}
}

// === impl WeightedKeys ===

impl<K> WeightedKeys<K> {
pub(crate) fn keys(&self) -> &[K] {
&self.keys
}

pub(crate) fn index(&self) -> WeightedIndex<u32> {
WeightedIndex::new(self.weights.iter().copied()).expect("distribution must be valid")
weighted_keys.validate_weights()?;
Ok(Self::RandomAvailable(Arc::new(weighted_keys)))
}
}
Loading

0 comments on commit b34d32b

Please sign in to comment.