Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(indexing): Support Redb as embedable nodecache #346

Merged
merged 10 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ chrono = { version = "0.4" }
indoc = { version = "2.0" }
regex = { version = "1.10.6" }
uuid = { version = "1.10", features = ["v3", "v4", "serde"] }
dyn-clone = { version = "1.0" }

# Integrations
spider = { version = "2.2" }
Expand All @@ -59,6 +60,7 @@ lancedb = { version = "0.10", default-features = false }
arrow-array = { version = "52.0", default-features = false }
arrow = { version = "52.2" }
parquet = { version = "52.2", default-features = false, features = ["async"] }
redb = { version = "2.1" }

# Testing
test-log = "0.2.16"
Expand Down
13 changes: 12 additions & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ homepage.workspace = true

[dev-dependencies]
tokio = { workspace = true, features = ["full"] }
swiftide = { path = "../swiftide", features = ["all"] }
swiftide = { path = "../swiftide", features = ["all", "redb", "test-utils"] }
swiftide-test-utils = { path = "../swiftide-test-utils", features = [
"test-utils",
] }
tracing-subscriber = "0.3"
serde_json = { workspace = true }
criterion = { version = "0.5.1", features = [
Expand All @@ -22,6 +25,8 @@ criterion = { version = "0.5.1", features = [
], default-features = false }
anyhow = { workspace = true }
futures-util = { workspace = true }
testcontainers = { workspace = true, features = ["blocking"] }
temp-dir = { workspace = true }

[[bench]]
name = "fileloader"
Expand All @@ -32,3 +37,9 @@ harness = false
name = "index-readme-local"
path = "local_pipeline.rs"
harness = false


[[bench]]
name = "node-cache"
path = "node_cache_comparison.rs"
harness = false
74 changes: 74 additions & 0 deletions benchmarks/node_cache_comparison.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use anyhow::Result;
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use swiftide::indexing::transformers::ChunkCode;
use swiftide::{
indexing::{loaders::FileLoader, persist::MemoryStorage, Pipeline},
traits::NodeCache,
};
use temp_dir::TempDir;
use testcontainers::Container;
use testcontainers::{
core::{IntoContainerPort, WaitFor},
runners::SyncRunner,
GenericImage,
};

async fn run_pipeline(node_cache: Box<dyn NodeCache>) -> Result<()> {
Pipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"]))
.filter_cached(node_cache)
.then_chunk(ChunkCode::try_for_language_and_chunk_size("rust", 10..256)?)
.then_store_with(MemoryStorage::default())
.run()
.await
}

fn criterion_benchmark(c: &mut Criterion) {
let redis_container = start_redis();

let redis_url = format!(
"redis://{host}:{port}",
host = redis_container.get_host().unwrap(),
port = redis_container.get_host_port_ipv4(6379).unwrap()
);

let redis: Box<dyn NodeCache> = Box::new(
swiftide::integrations::redis::Redis::try_from_url(redis_url, "criterion").unwrap(),
);

let tempdir = TempDir::new().unwrap();
let redb: Box<dyn NodeCache> = Box::new(
swiftide::integrations::redb::Redb::builder()
.database_path(tempdir.child("criterion"))
.build()
.unwrap(),
);

let runtime = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap();

for node_cache in [(redis, "redis"), (redb, "redb")] {
c.bench_with_input(
BenchmarkId::new("node_cache", node_cache.1),
&node_cache,
|b, s| {
let cache_clone = s.0.clone();
runtime.spawn_blocking(move || async move { cache_clone.clear().await.unwrap() });

b.to_async(&runtime).iter(|| run_pipeline(s.0.clone()))
},
);
}
}

fn start_redis() -> Container<GenericImage> {
GenericImage::new("redis", "7.2.4")
.with_exposed_port(6379.tcp())
.with_wait_for(WaitFor::message_on_stdout("Ready to accept connections"))
.start()
.expect("Redis started")
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
1 change: 1 addition & 0 deletions swiftide-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ strum_macros = { workspace = true }
mockall = { workspace = true, optional = true }
lazy_static = { workspace = true }
derive_builder = { workspace = true }
dyn-clone = { workspace = true }

tera = { version = "1.20", default-features = false }
uuid = { workspace = true, features = ["v4", "v3"] }
Expand Down
38 changes: 35 additions & 3 deletions swiftide-core/src/indexing_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ use crate::prompt::Prompt;
use anyhow::Result;
use async_trait::async_trait;

use dyn_clone::DynClone;
/// All traits are easily mockable under tests
#[cfg(feature = "test-utils")]
#[doc(hidden)]
use mockall::{automock, predicate::str};
use mockall::{automock, mock, predicate::str};

#[cfg_attr(feature = "test-utils", automock)]
#[async_trait]
Expand Down Expand Up @@ -216,22 +217,47 @@ impl ChunkerTransformer for &dyn ChunkerTransformer {
}
}

#[cfg_attr(feature = "test-utils", automock)]
// #[cfg_attr(feature = "test-utils", automock)]
#[async_trait]
/// Caches nodes, typically by their path and hash
/// Recommended to namespace on the storage
///
/// For now just bool return value for easy filter
pub trait NodeCache: Send + Sync + Debug {
pub trait NodeCache: Send + Sync + Debug + DynClone {
async fn get(&self, node: &Node) -> bool;
async fn set(&self, node: &Node);

/// Optionally provide a method to clear the cache
async fn clear(&self) -> Result<()> {
unimplemented!("Clear not implemented")
}

fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}

dyn_clone::clone_trait_object!(NodeCache);

#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub NodeCache {}

#[async_trait]
impl NodeCache for NodeCache {
async fn get(&self, node: &Node) -> bool;
async fn set(&self, node: &Node);
async fn clear(&self) -> Result<()>;

}

impl Clone for NodeCache {
fn clone(&self) -> Self;
}
}

#[async_trait]
impl NodeCache for Box<dyn NodeCache> {
async fn get(&self, node: &Node) -> bool {
Expand All @@ -240,6 +266,9 @@ impl NodeCache for Box<dyn NodeCache> {
async fn set(&self, node: &Node) {
self.as_ref().set(node).await;
}
async fn clear(&self) -> Result<()> {
self.as_ref().clear().await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
Expand All @@ -253,6 +282,9 @@ impl NodeCache for &dyn NodeCache {
async fn set(&self, node: &Node) {
(*self).set(node).await;
}
async fn clear(&self) -> Result<()> {
(*self).clear().await
}
}

#[cfg_attr(feature = "test-utils", automock)]
Expand Down
3 changes: 3 additions & 0 deletions swiftide-integrations/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ parquet = { workspace = true, optional = true, features = [
"snap",
] }
arrow = { workspace = true, optional = true }
redb = { workspace = true, optional = true }

[dev-dependencies]
swiftide-core = { path = "../swiftide-core", features = ["test-utils"] }
Expand Down Expand Up @@ -134,6 +135,8 @@ lancedb = ["dep:lancedb", "dep:deadpool", "dep:arrow-array"]
fluvio = ["dep:fluvio"]
# Paruqet loader
parquet = ["dep:arrow-array", "dep:parquet", "dep:arrow"]
# Redb as an embeddable node cache
redb = ["dep:redb"]

[lints]
workspace = true
2 changes: 2 additions & 0 deletions swiftide-integrations/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub mod openai;
pub mod parquet;
#[cfg(feature = "qdrant")]
pub mod qdrant;
#[cfg(feature = "redb")]
pub mod redb;
#[cfg(feature = "redis")]
pub mod redis;
#[cfg(feature = "scraping")]
Expand Down
Loading