Skip to content

Commit

Permalink
feat: api improvements with example (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
timonv authored Jun 13, 2024
1 parent 4fd8d89 commit 9ec93be
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 9 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ incremental = true
debug = 0

[workspace]
members = ["swiftide"]
members = ["swiftide", "examples"]
resolver = "2"
14 changes: 14 additions & 0 deletions examples/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "examples"
version = "0.0.0"
publish = false
edition = "2021"

[dev-dependencies]
tokio = { version = "1.0", features = ["full"] }
swiftide = { path = "../swiftide/" }
tracing-subscriber = "0.3"

[[example]]
name = "ingest-codebase"
path = "ingest_codebase.rs"
69 changes: 69 additions & 0 deletions examples/ingest_codebase.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//! # [Swiftide] Ingesting the Swiftide itself example
//!
//! This example demonstrates how to ingest the Swiftide codebase itself.
//! Note that for it to work correctly you need to have OPENAI_API_KEY set, redis and qdrant
//! running.
//!
//! The pipeline will:
//! - Load all `.rs` files from the current directory
//! - Skip any nodes previously processed; hashes are based on the path and chunk (not the
//! metadata!)
//! - Run metadata QA on each chunk; generating questions and answers and adding metadata
//! - Chunk the code into pieces of 10 to 2048 bytes
//! - Embed the chunks in batches of 10, Metadata is embedded by default
//! - Store the nodes in Qdrant
//!
//! Note that metadata is copied over to smaller chunks when chunking. When making LLM requests
//! with lots of small chunks, consider the rate limits of the API.
//!
//! [Swiftide]: https://github.com/bosun-ai/swiftide
//! [examples]: https://github.com/bosun-ai/swiftide/blob/master/examples
use swiftide::{
ingestion,
integrations::{self, qdrant::Qdrant, redis::RedisNodeCache},
loaders::FileLoader,
transformers::{ChunkCode, MetadataQACode, OpenAIEmbed},
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt::init();

let openai_client = integrations::openai::OpenAI::builder()
.default_embed_model("text-embedding-3-small")
.default_prompt_model("gpt-3.5-turbo")
.build()?;

let redis_url = std::env::var("REDIS_URL")
.as_deref()
.unwrap_or("redis://localhost:6379")
.to_owned();

let qdrant_url = std::env::var("QDRANT_URL")
.as_deref()
.unwrap_or("http://localhost:6334")
.to_owned();

ingestion::IngestionPipeline::from_loader(FileLoader::new(".").with_extensions(&["rs"]))
.filter_cached(RedisNodeCache::try_from_url(
redis_url,
"swiftide-examples",
)?)
.then(MetadataQACode::new(openai_client.clone()))
.then_chunk(ChunkCode::try_for_language_and_chunk_size(
"rust",
10..2048,
)?)
.then_in_batch(10, OpenAIEmbed::new(openai_client.clone()))
.store_with(
Qdrant::try_from_url(qdrant_url)?
.batch_size(50)
.vector_size(1536)
.collection_name("swiftide-examples".to_string())
.build()?,
)
.run()
.await?;
Ok(())
}
4 changes: 4 additions & 0 deletions swiftide/src/ingestion/ingestion_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ impl IngestionPipeline {

#[tracing::instrument(skip_all, fields(total_nodes), name = "ingestion_pipeline.run")]
pub async fn run(mut self) -> Result<()> {
tracing::info!(
"Starting ingestion pipeline with {} concurrency",
self.concurrency
);
let Some(ref storage) = self.storage else {
anyhow::bail!("No storage configured for ingestion pipeline")
};
Expand Down
24 changes: 24 additions & 0 deletions swiftide/src/integrations/openai/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,28 @@ impl OpenAIBuilder {
self.client = Some(Arc::new(client));
self
}

pub fn default_embed_model(&mut self, model: impl Into<String>) -> &mut Self {
if let Some(options) = self.default_options.as_mut() {
options.embed_model = Some(model.into());
} else {
self.default_options = Some(Options {
embed_model: Some(model.into()),
..Default::default()
});
}
self
}

pub fn default_prompt_model(&mut self, model: impl Into<String>) -> &mut Self {
if let Some(options) = self.default_options.as_mut() {
options.prompt_model = Some(model.into());
} else {
self.default_options = Some(Options {
prompt_model: Some(model.into()),
..Default::default()
});
}
self
}
}
6 changes: 3 additions & 3 deletions swiftide/src/integrations/qdrant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub struct Qdrant {
#[builder(default = "DEFAULT_COLLECTION_NAME.to_string()")]
collection_name: String,
vector_size: usize,
#[builder(default)]
#[builder(default, setter(strip_option))]
batch_size: Option<usize>,
}

Expand All @@ -26,8 +26,8 @@ impl Qdrant {
QdrantBuilder::default()
}

pub fn try_from_url(url: &str) -> Result<QdrantBuilder> {
Ok(QdrantBuilder::default().client(QdrantClient::from_url(url).build()?))
pub fn try_from_url(url: impl AsRef<str>) -> Result<QdrantBuilder> {
Ok(QdrantBuilder::default().client(QdrantClient::from_url(url.as_ref()).build()?))
}

pub async fn create_index_if_not_exists(&self) -> Result<()> {
Expand Down
6 changes: 3 additions & 3 deletions swiftide/src/integrations/redis/node_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ pub struct RedisNodeCache {
}

impl RedisNodeCache {
pub fn try_from_url(url: &str, prefix: &str) -> Result<Self> {
let client = redis::Client::open(url).context("Failed to open redis client")?;
pub fn try_from_url(url: impl AsRef<str>, prefix: impl AsRef<str>) -> Result<Self> {
let client = redis::Client::open(url.as_ref()).context("Failed to open redis client")?;
Ok(Self {
client,
connection_manager: RwLock::new(None),
key_prefix: prefix.to_string(),
key_prefix: prefix.as_ref().to_string(),
})
}

Expand Down
4 changes: 2 additions & 2 deletions swiftide/src/transformers/chunk_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ impl ChunkCode {
})
}

pub fn for_language_and_chunk_size(
lang: impl Into<SupportedLanguages>,
pub fn try_for_language_and_chunk_size(
lang: impl TryInto<SupportedLanguages>,
chunk_size: impl Into<ChunkSize>,
) -> Result<Self> {
Ok(Self {
Expand Down

0 comments on commit 9ec93be

Please sign in to comment.