From f20b2fba05c56b9e71e94e5fdd30170232b4c23f Mon Sep 17 00:00:00 2001 From: Sicheng Pan Date: Fri, 6 Dec 2024 11:48:41 -0800 Subject: [PATCH] Update knn orchestrator diagram --- .../worker/src/execution/orchestration/knn.rs | 99 ++++++------------- 1 file changed, 30 insertions(+), 69 deletions(-) diff --git a/rust/worker/src/execution/orchestration/knn.rs b/rust/worker/src/execution/orchestration/knn.rs index dfc533c6690..c57f6d9605e 100644 --- a/rust/worker/src/execution/orchestration/knn.rs +++ b/rust/worker/src/execution/orchestration/knn.rs @@ -27,8 +27,8 @@ use crate::{ use super::knn_filter::{KnnError, KnnFilterOutput, KnnResult}; -/// The `knn` module contains two orchestrator: `KnnFilterOrchestrator` and `KnnOrchestrator`. -/// When used together, they carry out the evaluation of a `.query(...)` query +/// The `KnnOrchestrator` finds the nearest neighbor of a target embedding given the search domain. +/// When used together with `KnnFilterOrchestrator`, they evaluate a `.query(...)` query /// for the user. We breakdown the evaluation into two parts because a `.query(...)` /// is inherently multiple queries sharing the same filter criteria. Thus we first evaluate /// the filter criteria with `KnnFilterOrchestrator`. Then we spawn a `KnnOrchestrator` for each @@ -38,53 +38,23 @@ use super::knn_filter::{KnnError, KnnFilterOutput, KnnResult}; /// /// # Pipeline /// ```text -/// │ -/// │ -/// │ -/// ┌──────────────────────────── │ ───────────────────────────────┐ -/// │ ▼ │ -/// │ ┌────────────┐ KnnFilterOrchestrator │ -/// │ │ │ │ -/// │ ┌───────────┤ on_start ├────────────────┐ │ -/// │ │ │ │ │ │ -/// │ │ └────────────┘ │ │ -/// │ │ │ │ -/// │ ▼ ▼ │ -/// │ ┌────────────────────┐ ┌────────────────────────┐ │ -/// │ │ │ │ │ │ -/// │ │ FetchLogOperator │ │ FetchSegmentOperator │ │ -/// │ │ │ │ │ │ -/// │ └────────┬───────────┘ └────────────────┬───────┘ │ -/// │ │ │ │ -/// │ │ │ │ -/// │ │ ┌─────────────────────────────┐ │ │ -/// │ │ │ │ │ │ -/// │ └────►│ try_start_filter_operator │◄────┘ │ -/// │ │ │ │ -/// │ └────────────┬────────────────┘ │ -/// │ │ │ -/// │ ▼ │ -/// │ ┌───────────────────┐ │ -/// │ │ │ │ -/// │ │ FilterOperator │ │ -/// │ │ │ │ -/// │ └─────────┬─────────┘ │ -/// │ │ │ -/// │ ▼ │ -/// │ ┌──────────────────┐ │ -/// │ │ │ │ -/// │ │ result_channel │ │ -/// │ │ │ │ -/// │ └────────┬─────────┘ │ -/// │ │ │ -/// └──────────────────────────── │ ───────────────────────────────┘ -/// │ -/// │ -/// │ -/// ┌──────────────────────────────────┴─────────────────────────────────────┐ -/// │ │ -/// │ ... One branch per embedding ... │ -/// │ │ +/// │ +/// │ +/// │ +/// │ +/// ▼ +/// ┌───────────────────────┐ +/// │ │ +/// │ KnnFilterOrchestrator │ +/// │ │ +/// └───────────┬───────────┘ +/// │ +/// │ +/// │ +/// ┌──────────────────────────────────┴─────────────────────────────────────┐ +/// │ │ +/// │ ... One branch per embedding ... │ +/// │ │ /// ┌──────────────────── │ ─────────────────────┐ ┌──────────────────── │ ─────────────────────┐ /// │ ▼ │ │ ▼ │ /// │ ┌────────────┐ KnnOrchestrator │ │ ┌────────────┐ KnnOrchestrator │ @@ -129,27 +99,18 @@ use super::knn_filter::{KnnError, KnnFilterOutput, KnnResult}; /// │ └────────┬─────────┘ │ │ └────────┬─────────┘ │ /// │ │ │ │ │ │ /// └──────────────────── │ ─────────────────────┘ └──────────────────── │ ─────────────────────┘ -/// │ │ -/// │ │ -/// │ │ -/// │ ┌────────────────┐ │ -/// │ │ │ │ -/// └──────────────────────────►│ try_join_all │◄──────────────────────────┘ -/// │ │ -/// └───────┬────────┘ -/// │ -/// │ -/// ▼ +/// │ │ +/// │ │ +/// │ │ +/// │ ┌────────────────┐ │ +/// │ │ │ │ +/// └──────────────────────────►│ try_join_all │◄──────────────────────────┘ +/// │ │ +/// └───────┬────────┘ +/// │ +/// │ +/// ▼ /// ``` -/// -/// # State tracking -/// Similar to the `GetOrchestrator`, the `KnnFilterOrchestrator` need to keep track of the outputs from -/// `FetchLogOperator` and `FetchSegmentOperator`. For `KnnOrchestrator`, it needs to track the outputs from -/// `KnnLogOperator` and `KnnHnswOperator`. It invokes `try_start_knn_merge_operator` when it receives outputs -/// from either operators, and if both outputs are present it composes the input for `KnnMergeOperator` and -/// proceeds with execution. The outputs of other operators are directly forwarded without being tracked -/// by the orchestrator. - #[derive(Debug)] pub struct KnnOrchestrator { // Orchestrator parameters