diff --git a/swiftide/src/transformers/openai_embed.rs b/swiftide/src/transformers/openai_embed.rs index f5b76017..ea7e8453 100644 --- a/swiftide/src/transformers/openai_embed.rs +++ b/swiftide/src/transformers/openai_embed.rs @@ -9,12 +9,25 @@ use anyhow::Result; use async_trait::async_trait; use futures_util::{stream, StreamExt}; +/// A transformer that uses the OpenAI API to generate embeddings for data. +/// +/// This file defines the `OpenAIEmbed` struct and its implementation of the `BatchableTransformer` trait. +/// The primary purpose of this transformer is to embed data using the OpenAI API. #[derive(Debug)] pub struct OpenAIEmbed { client: Arc, } impl OpenAIEmbed { + /// Creates a new instance of `OpenAIEmbed`. + /// + /// # Parameters + /// + /// * `client` - An instance of the OpenAI client. + /// + /// # Returns + /// + /// A new instance of `OpenAIEmbed`. pub fn new(client: OpenAI) -> Self { Self { client: Arc::new(client), @@ -24,6 +37,19 @@ impl OpenAIEmbed { #[async_trait] impl BatchableTransformer for OpenAIEmbed { + /// Transforms a batch of `IngestionNode` objects by generating embeddings for them. + /// + /// # Parameters + /// + /// * `nodes` - A vector of `IngestionNode` objects to be transformed. + /// + /// # Returns + /// + /// An `IngestionStream` containing the transformed `IngestionNode` objects with their embeddings. + /// + /// # Errors + /// + /// If the embedding process fails, the function returns a stream with the error. #[tracing::instrument(skip_all, name = "transformers.openai_embed")] async fn batch_transform(&self, nodes: Vec) -> IngestionStream { // TODO: We should drop chunks that go over the token limit of the EmbedModel