Skip to content

Commit

Permalink
feature to allow to link two extraction graphs
Browse files Browse the repository at this point in the history
Feature to support making an output of an extraction graph an input to
another extraction graph. All top level policies (with content source
ingestion) of the linked graph are applied to the specified
graph/content_source combination.
  • Loading branch information
maxkozlovsky committed Jun 27, 2024
1 parent 5849644 commit b94cbe9
Show file tree
Hide file tree
Showing 13 changed files with 488 additions and 36 deletions.
31 changes: 30 additions & 1 deletion crates/indexify_internal_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,35 @@ impl ExtractionGraphBuilder {
}
}

#[derive(Debug, Clone, Serialize, PartialEq, Eq, Deserialize, Hash)]
pub struct ExtractionGraphNode {
pub namespace: String,
pub graph_name: String,
pub source: ContentSource,
}

/// Links a node in extraction graph to another graph.
/// All top level policies in the linked graph will be applied to the
/// specified node.
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Deserialize)]
pub struct ExtractionGraphLink {
pub node: ExtractionGraphNode,
pub graph_name: String,
}

impl From<indexify_coordinator::LinkExtractionGraphsRequest> for ExtractionGraphLink {
fn from(value: indexify_coordinator::LinkExtractionGraphsRequest) -> Self {
Self {
node: ExtractionGraphNode {
namespace: value.namespace,
graph_name: value.source_graph_name,
source: value.content_source.into(),
},
graph_name: value.linked_graph_name,
}
}
}

pub type IndexName = String;
pub type IndexId = String;

Expand Down Expand Up @@ -794,7 +823,7 @@ impl Default for ContentMetadataId {
}
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub enum ContentSource {
Ingestion,
ExtractionPolicyName(ExtractionPolicyName),
Expand Down
102 changes: 102 additions & 0 deletions crates/indexify_proto/src/indexify_coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,21 @@ pub struct ExecutorsHeartbeatRequest {
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct ExecutorsHeartbeatResponse {}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct LinkExtractionGraphsRequest {
#[prost(string, tag = "1")]
pub namespace: ::prost::alloc::string::String,
#[prost(string, tag = "2")]
pub source_graph_name: ::prost::alloc::string::String,
#[prost(string, tag = "3")]
pub content_source: ::prost::alloc::string::String,
#[prost(string, tag = "4")]
pub linked_graph_name: ::prost::alloc::string::String,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct LinkExtractionGraphsResponse {}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
#[repr(i32)]
pub enum TaskOutcome {
Expand Down Expand Up @@ -2075,6 +2090,36 @@ pub mod coordinator_service_client {
);
self.inner.unary(req, path, codec).await
}
pub async fn link_extraction_graphs(
&mut self,
request: impl tonic::IntoRequest<super::LinkExtractionGraphsRequest>,
) -> std::result::Result<
tonic::Response<super::LinkExtractionGraphsResponse>,
tonic::Status,
> {
self.inner
.ready()
.await
.map_err(|e| {
tonic::Status::new(
tonic::Code::Unknown,
format!("Service was not ready: {}", e.into()),
)
})?;
let codec = tonic::codec::ProstCodec::default();
let path = http::uri::PathAndQuery::from_static(
"/indexify_coordinator.CoordinatorService/LinkExtractionGraphs",
);
let mut req = request.into_request();
req.extensions_mut()
.insert(
GrpcMethod::new(
"indexify_coordinator.CoordinatorService",
"LinkExtractionGraphs",
),
);
self.inner.unary(req, path, codec).await
}
}
}
/// Generated server implementations.
Expand Down Expand Up @@ -2332,6 +2377,13 @@ pub mod coordinator_service_server {
tonic::Response<super::ExecutorsHeartbeatResponse>,
tonic::Status,
>;
async fn link_extraction_graphs(
&self,
request: tonic::Request<super::LinkExtractionGraphsRequest>,
) -> std::result::Result<
tonic::Response<super::LinkExtractionGraphsResponse>,
tonic::Status,
>;
}
#[derive(Debug)]
pub struct CoordinatorServiceServer<T: CoordinatorService> {
Expand Down Expand Up @@ -4113,6 +4165,56 @@ pub mod coordinator_service_server {
};
Box::pin(fut)
}
"/indexify_coordinator.CoordinatorService/LinkExtractionGraphs" => {
#[allow(non_camel_case_types)]
struct LinkExtractionGraphsSvc<T: CoordinatorService>(pub Arc<T>);
impl<
T: CoordinatorService,
> tonic::server::UnaryService<super::LinkExtractionGraphsRequest>
for LinkExtractionGraphsSvc<T> {
type Response = super::LinkExtractionGraphsResponse;
type Future = BoxFuture<
tonic::Response<Self::Response>,
tonic::Status,
>;
fn call(
&mut self,
request: tonic::Request<super::LinkExtractionGraphsRequest>,
) -> Self::Future {
let inner = Arc::clone(&self.0);
let fut = async move {
<T as CoordinatorService>::link_extraction_graphs(
&inner,
request,
)
.await
};
Box::pin(fut)
}
}
let accept_compression_encodings = self.accept_compression_encodings;
let send_compression_encodings = self.send_compression_encodings;
let max_decoding_message_size = self.max_decoding_message_size;
let max_encoding_message_size = self.max_encoding_message_size;
let inner = self.inner.clone();
let fut = async move {
let inner = inner.0;
let method = LinkExtractionGraphsSvc(inner);
let codec = tonic::codec::ProstCodec::default();
let mut grpc = tonic::server::Grpc::new(codec)
.apply_compression_config(
accept_compression_encodings,
send_compression_encodings,
)
.apply_max_message_size_config(
max_decoding_message_size,
max_encoding_message_size,
);
let res = grpc.unary(method, req).await;
Ok(res)
};
Box::pin(fut)
}
_ => {
Box::pin(async move {
Ok(
Expand Down
11 changes: 11 additions & 0 deletions protos/coordinator_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ service CoordinatorService {
rpc UpdateLabels(UpdateLabelsRequest) returns (UpdateLabelsResponse) {}

rpc ExecutorsHeartbeat(ExecutorsHeartbeatRequest) returns (ExecutorsHeartbeatResponse) {}

rpc LinkExtractionGraphs(LinkExtractionGraphsRequest) returns (LinkExtractionGraphsResponse) {}
}

message GetContentMetadataRequest {
Expand Down Expand Up @@ -575,3 +577,12 @@ message ExecutorsHeartbeatRequest {
}

message ExecutorsHeartbeatResponse {}

message LinkExtractionGraphsRequest {
string namespace = 1;
string source_graph_name = 2;
string content_source = 3;
string linked_graph_name = 4;
}

message LinkExtractionGraphsResponse {}
7 changes: 7 additions & 0 deletions src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ use utoipa::{IntoParams, ToSchema};

use crate::{api_utils, metadata_storage, vectordbs};

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ExtractionGraphLink {
pub source_graph_name: String,
pub content_source: String,
pub linked_graph_name: String,
}

#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ExtractionGraph {
pub id: String,
Expand Down
106 changes: 104 additions & 2 deletions src/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
};

use anyhow::{anyhow, Result};
use indexify_internal_api::{self as internal_api};
use indexify_internal_api::{self as internal_api, ExtractionGraphLink};
use indexify_proto::indexify_coordinator::{self, CreateContentStatus};
use internal_api::{
ContentMetadataId,
Expand Down Expand Up @@ -70,6 +70,10 @@ impl Coordinator {
})
}

pub async fn link_graphs(&self, link: ExtractionGraphLink) -> Result<()> {
self.shared_state.link_graphs(link).await
}

pub fn get_locked_my_executors(&self) -> std::sync::MutexGuard<HashSet<String>> {
self.my_executors.lock().unwrap()
}
Expand Down Expand Up @@ -751,7 +755,7 @@ impl Coordinator {
mod tests {
use std::{collections::HashMap, fs, sync::Arc, time::Duration, vec};

use indexify_internal_api as internal_api;
use indexify_internal_api::{self as internal_api, ExtractionGraphLink, ExtractionGraphNode};
use indexify_proto::indexify_coordinator::CreateContentStatus;
use internal_api::{ContentMetadataId, ContentSource, TaskOutcome};

Expand Down Expand Up @@ -1815,6 +1819,104 @@ mod tests {
Ok(())
}

#[tokio::test]
async fn test_link_graphs() -> Result<(), anyhow::Error> {
let (coordinator, _) = setup_coordinator().await;

coordinator.create_namespace(DEFAULT_TEST_NAMESPACE).await?;

let _executor_id_1 = "test_executor_id_1";
let extractor_1 = mock_extractor();
coordinator
.register_executor(
"localhost:8956",
"test_executor_id",
vec![extractor_1.clone()],
)
.await?;

// Create an extraction graph
let eg_1 = create_test_extraction_graph_with_children(
"test_extraction_graph_1",
vec![
"test_extraction_policy_1",
"test_extraction_policy_2",
"test_extraction_policy_3",
],
&[Root, Child(0), Child(0)],
);
coordinator.create_extraction_graph(eg_1.clone()).await?;
coordinator.run_scheduler().await?;

let eg_2 = create_test_extraction_graph_with_children(
"test_extraction_graph_2",
vec![
"test_extraction_policy_4",
"test_extraction_policy_5",
"test_extraction_policy_6",
],
&[Root, Child(0), Child(0)],
);
coordinator.create_extraction_graph(eg_2.clone()).await?;
coordinator.run_scheduler().await?;

let eg_3 = create_test_extraction_graph_with_children(
"test_extraction_graph_3",
vec![
"test_extraction_policy_7",
"test_extraction_policy_8",
"test_extraction_policy_9",
],
&[Root, Child(0), Child(0)],
);
coordinator.create_extraction_graph(eg_3.clone()).await?;
coordinator.run_scheduler().await?;

let link = ExtractionGraphLink {
node: ExtractionGraphNode {
namespace: DEFAULT_TEST_NAMESPACE.to_string(),
graph_name: eg_1.name.clone(),
source: ContentSource::ExtractionPolicyName(
eg_1.extraction_policies[1].name.clone(),
),
},
graph_name: eg_2.name.clone(),
};
coordinator.link_graphs(link).await?;

let link = ExtractionGraphLink {
node: ExtractionGraphNode {
namespace: DEFAULT_TEST_NAMESPACE.to_string(),
graph_name: eg_1.name.clone(),
source: ContentSource::ExtractionPolicyName(
eg_1.extraction_policies[2].name.clone(),
),
},
graph_name: eg_3.name.clone(),
};
coordinator.link_graphs(link).await?;

let parent_content = test_mock_content_metadata("test_parent_id", "", &eg_1.name);
let create_res = coordinator
.create_content_metadata(vec![parent_content.clone()])
.await?;
assert_eq!(create_res.len(), 1);
assert_eq!(*create_res.first().unwrap(), CreateContentStatus::Created);
coordinator.run_scheduler().await?;
let all_tasks = coordinator.shared_state.list_all_unfinished_tasks().await?;
assert_eq!(all_tasks.len(), 1);

let mut child_id = 1;
perform_all_tasks(&coordinator, "test_executor_id_1", &mut child_id).await?;

let tree = coordinator
.shared_state
.get_content_tree_metadata(&parent_content.id.id)?;
assert_eq!(tree.len(), 10);

Ok(())
}

#[tokio::test]
#[tracing_test::traced_test]
async fn test_policy_filters() -> Result<(), anyhow::Error> {
Expand Down
17 changes: 16 additions & 1 deletion src/coordinator_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use anyhow::{anyhow, Result};
use axum::{extract::State, routing::get};
use futures::StreamExt;
use hyper::StatusCode;
use indexify_internal_api::{self as internal_api, ContentSourceFilter};
use indexify_internal_api::{self as internal_api, ContentSourceFilter, ExtractionGraphLink};
use indexify_proto::indexify_coordinator::{
self,
coordinator_service_server::CoordinatorService,
Expand Down Expand Up @@ -213,6 +213,21 @@ impl CoordinatorService for CoordinatorServiceServer {
type GCTasksStreamStream = GCTasksResponseStream;
type HeartbeatStream = HBResponseStream;

async fn link_extraction_graphs(
&self,
request: tonic::Request<indexify_coordinator::LinkExtractionGraphsRequest>,
) -> Result<tonic::Response<indexify_coordinator::LinkExtractionGraphsResponse>, tonic::Status>
{
let link: ExtractionGraphLink = request.into_inner().into();
self.coordinator
.link_graphs(link)
.await
.map_err(|e| tonic::Status::aborted(e.to_string()))?;
Ok(tonic::Response::new(
indexify_coordinator::LinkExtractionGraphsResponse {},
))
}

async fn executors_heartbeat(
&self,
request: tonic::Request<ExecutorsHeartbeatRequest>,
Expand Down
Loading

0 comments on commit b94cbe9

Please sign in to comment.