-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(query): Add support for filters in SimilaritySingleEmbedding (#298)
Adds support for filters for Qdrant and Lancedb in SimilaritySingleEmbedding. Also fixes several small bugs and brings improved tests.
- Loading branch information
Showing
16 changed files
with
460 additions
and
73 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
//! Search strategies provide a generic way for Retrievers to implement their | ||
//! search in various ways. | ||
//! | ||
//! The strategy is also yielded to the Retriever and can contain addition configuration | ||
mod hybrid_search; | ||
mod similarity_single_embedding; | ||
|
||
pub(crate) const DEFAULT_TOP_K: u64 = 10; | ||
pub(crate) const DEFAULT_TOP_N: u64 = 10; | ||
|
||
pub use hybrid_search::*; | ||
pub use similarity_single_embedding::*; | ||
|
||
pub trait SearchFilter: Clone + Sync + Send {} | ||
|
||
#[cfg(feature = "qdrant")] | ||
impl SearchFilter for qdrant_client::qdrant::Filter {} | ||
|
||
// When no filters are applied | ||
impl SearchFilter for () {} | ||
// Lancedb uses a string filter | ||
impl SearchFilter for String {} |
72 changes: 72 additions & 0 deletions
72
swiftide-core/src/search_strategies/similarity_single_embedding.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
use crate::querying; | ||
|
||
use super::{SearchFilter, DEFAULT_TOP_K}; | ||
|
||
/// A simple, single vector similarity search where it takes the embedding on the current query | ||
/// and returns `top_k` documents. | ||
/// | ||
/// Can optionally be used with a filter. | ||
#[derive(Debug, Clone)] | ||
pub struct SimilaritySingleEmbedding<FILTER: SearchFilter = ()> { | ||
/// Maximum number of documents to return | ||
top_k: u64, | ||
|
||
filter: Option<FILTER>, | ||
} | ||
|
||
impl<FILTER: SearchFilter> querying::SearchStrategy for SimilaritySingleEmbedding<FILTER> {} | ||
|
||
impl<FILTER: SearchFilter> Default for SimilaritySingleEmbedding<FILTER> { | ||
fn default() -> Self { | ||
Self { | ||
top_k: DEFAULT_TOP_K, | ||
filter: None, | ||
} | ||
} | ||
} | ||
|
||
impl SimilaritySingleEmbedding<()> { | ||
/// Set an optional filter to be used in the query | ||
pub fn into_concrete_filter<FILTER: SearchFilter>(&self) -> SimilaritySingleEmbedding<FILTER> { | ||
SimilaritySingleEmbedding::<FILTER> { | ||
top_k: self.top_k, | ||
filter: None, | ||
} | ||
} | ||
} | ||
|
||
impl<FILTER: SearchFilter> SimilaritySingleEmbedding<FILTER> { | ||
pub fn from_filter(filter: FILTER) -> Self { | ||
Self { | ||
filter: Some(filter), | ||
..Default::default() | ||
} | ||
} | ||
|
||
/// Set the maximum amount of documents to be returned | ||
pub fn with_top_k(&mut self, top_k: u64) -> &mut Self { | ||
self.top_k = top_k; | ||
|
||
self | ||
} | ||
|
||
/// Returns the maximum of documents to be returned | ||
pub fn top_k(&self) -> u64 { | ||
self.top_k | ||
} | ||
|
||
/// Set an optional filter to be used in the query | ||
pub fn with_filter<NEWFILTER: SearchFilter>( | ||
self, | ||
filter: NEWFILTER, | ||
) -> SimilaritySingleEmbedding<NEWFILTER> { | ||
SimilaritySingleEmbedding::<NEWFILTER> { | ||
top_k: self.top_k, | ||
filter: Some(filter), | ||
} | ||
} | ||
|
||
pub fn filter(&self) -> &Option<FILTER> { | ||
&self.filter | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.