Skip to content

Commit

Permalink
working example
Browse files Browse the repository at this point in the history
  • Loading branch information
santiagomed committed Nov 1, 2023
1 parent d28c4fd commit f8b9842
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ resolver = "1"
members = [
"orca",
"examples/*"
]
]
2 changes: 2 additions & 0 deletions examples/pdf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ anyhow = "1.0.75"
tokio = { version = "1.12.0", features = ["full"] }
clap = "4.4.7"
serde_json = "1.0.108"
env_logger = "0.10.0"
rayon = "1.8.0"
Binary file added examples/pdf/naval-book.pdf
Binary file not shown.
27 changes: 11 additions & 16 deletions examples/pdf/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ use orca::chains::Chain;
use orca::llm::bert::Bert;
use orca::llm::openai::OpenAI;
use orca::llm::Embedding;
use orca::prompt;
use orca::qdrant::Qdrant;
use orca::qdrant::Value;
use orca::record::pdf;
use orca::record::pdf::Pdf;
use orca::record::Spin;
use orca::{prompt, prompts};
use rayon::prelude::*;
use serde_json::json;

#[derive(Parser, Debug)]
Expand All @@ -38,30 +40,30 @@ struct Args {
async fn main() -> Result<()> {
let args = Args::parse();

// print pwd
println!("pwd: {:?}", std::env::current_dir()?);
// init logger
env_logger::init();

let collection = if let Some(col) = args.collection {
col
} else {
args.file.split("/").last().unwrap().split(".").next().unwrap().to_string()
};

let pdf_records = Pdf::from_file(&args.file, false).spin()?.split(1000);
let pdf_records = Pdf::from_file(&args.file, false).spin()?.split(500);
let bert = Bert::new().build_model_and_tokenizer().await?;

let qdrant = Qdrant::new("localhost", 6334);
if qdrant.create_collection(&collection, 384).await.is_ok() {
let mut embeddings = Vec::new();
for record in &pdf_records {
let embedding = bert.generate_embedding(prompt!(record)).await?;
embeddings.push(embedding.get_embedding()?);
embeddings.push(embedding.to_vec()?);
}
qdrant.insert_many(&collection, embeddings, pdf_records).await?;
qdrant.insert_many(&collection, embeddings.clone(), pdf_records).await?;
}

let query_embedding = bert.generate_embedding(prompt!(args.prompt)).await?;
let result = qdrant.search(&collection, query_embedding.get_embedding()?, 5, None).await?;
let result = qdrant.search(&collection, query_embedding.to_vec()?.clone(), 5, None).await?;

let context = json!({
"user_prompt": args.prompt,
Expand All @@ -76,26 +78,19 @@ async fn main() -> Result<()> {
.collect::<Vec<String>>()
});

println!("Context: {:#?}", context);

let prompt_for_model = r#"
{{#chat}}
{{#system}}
You are a highly advanced assistant. You receive a prompt from a user and relevant excerpts extracted from a PDF.
You then answer truthfully to the best of your ability. If you do not know the answer, your response is "I don't know".
You are a highly advanced assistant. You receive a prompt from a user and relevant excerpts extracted from a PDF. You then answer truthfully to the best of your ability. If you do not know the answer, your response is I don't know.
{{/system}}
{{#user}}
'{{user_prompt}}'.
{{user_prompt}}
{{/user}}
{{#system}}
Based on the retrieved information from the PDF, here are the relevant excerpts:
{{#each payloads}}
{{this}}
{{/each}}
Please provide a comprehensive answer to the user's question, integrating insights from these excerpts and your general knowledge.
{{/system}}
{{/chat}}
Expand Down

0 comments on commit f8b9842

Please sign in to comment.