Skip to content

Commit

Permalink
feat(query): Ragas support (#236)
Browse files Browse the repository at this point in the history
Work in progress on support for ragas as per
explodinggradients/ragas#1165 and #232

Add an optional evaluator to a pipeline. Evaluators need to handle
transformation events in the query pipeline. The Ragas evaluator
captures the transformations as per
https://docs.ragas.io/en/latest/howtos/applications/data_preparation.html.

You can find a working notebook here
https://github.com/bosun-ai/swiftide-tutorial/blob/c510788a625215f46575415161659edf26fc1fd5/ragas/notebook.ipynb
with a pipeline using it here
bosun-ai/swiftide-tutorial#1



TODO:
- [x] Test it with Ragas
- [x] Add more tests
  • Loading branch information
timonv authored Aug 29, 2024
1 parent cc7ec08 commit c00b6c8
Show file tree
Hide file tree
Showing 14 changed files with 587 additions and 18 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions swiftide-core/src/indexing_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ pub trait Persist: Debug + Send + Sync {
}
}

/// Allows for passing defaults from the pipeline to the transformer
/// Required for batch transformers as at least a marker, implementation is not required
pub trait WithIndexingDefaults {
fn with_indexing_defaults(&mut self, _indexing_defaults: IndexingDefaults) {}
}

/// Allows for passing defaults from the pipeline to the batch transformer
/// Required for batch transformers as at least a marker, implementation is not required
pub trait WithBatchIndexingDefaults {
fn with_indexing_defaults(&mut self, _indexing_defaults: IndexingDefaults) {}
}
Expand Down
2 changes: 2 additions & 0 deletions swiftide-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod prompt;
pub use type_aliases::*;

mod metadata;
mod query_evaluation;

/// All traits are available from the root
pub use crate::indexing_traits::*;
Expand All @@ -27,6 +28,7 @@ pub mod indexing {

pub mod querying {
pub use crate::query::*;
pub use crate::query_evaluation::*;
pub use crate::query_stream::*;
pub use crate::query_traits::*;
pub mod search_strategies {
Expand Down
33 changes: 29 additions & 4 deletions swiftide-core/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,31 @@
//! `states::Pending`: No documents have been retrieved
//! `states::Retrieved`: Documents have been retrieved
//! `states::Answered`: The query has been answered
use derive_builder::Builder;

use crate::Embedding;

type Document = String;

#[derive(Clone, Default)]
/// A query is the main object going through a query pipeline
///
/// It acts as a statemachine, with the following transitions:
///
/// `states::Pending`: No documents have been retrieved
/// `states::Retrieved`: Documents have been retrieved
/// `states::Answered`: The query has been answered
#[derive(Clone, Default, Builder)]
#[builder(setter(into))]
pub struct Query<State> {
original: String,
#[builder(default = "self.original.clone().unwrap_or_default()")]
current: String,
state: State,
#[builder(default)]
transformation_history: Vec<TransformationEvent>,

// TODO: How would this work when doing a rollup query?
#[builder(default)]
pub embedding: Option<Embedding>,
}

Expand All @@ -32,7 +45,11 @@ impl<T: std::fmt::Debug> std::fmt::Debug for Query<T> {
}
}

impl<T> Query<T> {
impl<T: Clone> Query<T> {
pub fn builder() -> QueryBuilder<T> {
QueryBuilder::default().clone()
}

/// Return the query it started with
pub fn original(&self) -> &str {
&self.original
Expand Down Expand Up @@ -125,17 +142,24 @@ impl Query<states::Answered> {
}
}

/// States of a query
pub mod states {
use super::Builder;
use super::Document;

#[derive(Debug, Default, Clone)]
/// The query is pending and has not been used
pub struct Pending;

#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone, Builder)]
#[builder(setter(into))]
/// Documents have been retrieved
pub struct Retrieved {
pub(crate) documents: Vec<Document>,
}
#[derive(Debug, Clone)]
#[derive(Debug, Default, Clone, Builder)]
#[builder(setter(into))]
/// The query has been answered
pub struct Answered {
pub(crate) answer: String,
}
Expand All @@ -154,6 +178,7 @@ impl<T: AsRef<str>> From<T> for Query<states::Pending> {

#[allow(dead_code)]
#[derive(Clone, Debug)]
/// Records changes to a query
pub enum TransformationEvent {
Transformed {
before: String,
Expand Down
40 changes: 40 additions & 0 deletions swiftide-core/src/query_evaluation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use crate::querying::{states, Query};

/// Wraps a query for evaluation. Used by the [`EvaluateQuery`] trait.
pub enum QueryEvaluation {
/// Retrieve documents
RetrieveDocuments(Query<states::Retrieved>),
/// Answer the query
AnswerQuery(Query<states::Answered>),
}

impl From<Query<states::Retrieved>> for QueryEvaluation {
fn from(val: Query<states::Retrieved>) -> Self {
QueryEvaluation::RetrieveDocuments(val)
}
}

impl From<Query<states::Answered>> for QueryEvaluation {
fn from(val: Query<states::Answered>) -> Self {
QueryEvaluation::AnswerQuery(val)
}
}

// TODO: must be a nicer way, maybe not needed and full encapsulation is better anyway
impl QueryEvaluation {
pub fn retrieve_documents_query(self) -> Option<Query<states::Retrieved>> {
if let QueryEvaluation::RetrieveDocuments(query) = self {
Some(query)
} else {
None
}
}

pub fn answer_query(self) -> Option<Query<states::Answered>> {
if let QueryEvaluation::AnswerQuery(query) = self {
Some(query)
} else {
None
}
}
}
3 changes: 3 additions & 0 deletions swiftide-core/src/query_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ use pin_project_lite::pin_project;
use crate::querying::Query;

pin_project! {
/// Internally used by a query pipeline
///
/// Has a sender and receiver to initialize the stream
pub struct QueryStream<'stream, Q: 'stream> {
#[pin]
pub(crate) inner: Pin<Box<dyn Stream<Item = Result<Query<Q>>> + Send + 'stream>>,
Expand Down
18 changes: 15 additions & 3 deletions swiftide-core/src/query_traits.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use anyhow::Result;
use async_trait::async_trait;

use crate::query::{
states::{self, Retrieved},
Query,
use crate::{
query::{
states::{self, Retrieved},
Query,
},
querying::QueryEvaluation,
};

/// Can transform queries before retrieval
Expand All @@ -28,6 +31,7 @@ where
}
}

/// A search strategy for the query pipeline
pub trait SearchStrategy: Clone + Send + Sync + Default {}

/// Can retrieve documents given a SearchStrategy
Expand Down Expand Up @@ -87,3 +91,11 @@ where
(self)(query)
}
}

/// Evaluates a query
///
/// An evaluator needs to be able to respond to each step in the query pipeline
#[async_trait]
pub trait EvaluateQuery: Send + Sync {
async fn evaluate(&self, evaluation: QueryEvaluation) -> Result<()>;
}
8 changes: 4 additions & 4 deletions swiftide-core/src/search_strategies.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Search strategies provide a generic way for Retrievers to implement their
//! search in various ways.
//!
//! The strategy is also yielded to the Retriever and can contain addition configuration
/// Search strategies provide a generic way for Retrievers to implement their
/// search in various ways.
///
/// The strategy is also yielded to the Retriever and can contain addition configuration
use crate::querying;

/// A very simple search where it takes the embedding on the current query
Expand Down
2 changes: 2 additions & 0 deletions swiftide-query/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ num_cpus = { workspace = true }
pin-project-lite = { workspace = true }
tracing = { workspace = true }
indoc = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }

# Internal
swiftide-core = { path = "../swiftide-core", version = "0.9.0" }
Expand Down
21 changes: 17 additions & 4 deletions swiftide-query/src/answers/simple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ use swiftide_core::{
Answer,
};

/// Generate an answer based on the current query
///
/// For example, after retrieving documents, and those are summarized,
/// will answer the original question with the current text in the query.
///
/// If `current` on the Query is empty, it will concatenate the documents
/// as context instead.
#[derive(Debug, Clone, Builder)]
pub struct Simple {
#[builder(setter(custom))]
Expand Down Expand Up @@ -49,8 +56,7 @@ impl SimpleBuilder {
}

fn default_prompt() -> PromptTemplate {
indoc::indoc!(
"
indoc::indoc! {"
Answer the following question based on the context provided:
{{ question }}
Expand All @@ -62,8 +68,7 @@ fn default_prompt() -> PromptTemplate {
## Context
{{ context }}
"
)
"}
.into()
}

Expand All @@ -77,6 +82,14 @@ impl Answer for Simple {
query.current()
};

let prompt = self
.prompt_template
.to_prompt()
.with_context_value("question", query.original())
.with_context_value("context", context);

tracing::debug!(prompt = ?prompt, "Prompting from Simple for answer");

let answer = self
.client
.prompt(
Expand Down
6 changes: 6 additions & 0 deletions swiftide-query/src/evaluators/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*!
This module contains evaluators for evaluating the quality of a pipeline.
Evaluators must implement the [`swiftide_core::traits::Evaluator`] trait.
*/
pub mod ragas;
Loading

0 comments on commit c00b6c8

Please sign in to comment.