feat(llm): Ollama integration

Work in progress on supporting local LLMs for enhanced privacy and cost efficiency Discussion #3
mbarinov · Oct 13, 2024 · 71b27f9 · 71b27f9
1 parent 669548a
commit 71b27f9
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 24 deletions.
diff --git a/package.json b/package.json
@@ -12,6 +12,7 @@
   "dependencies": {
     "@langchain/community": "^0.3.1",
     "@langchain/core": "^0.3.3",
+    "@langchain/ollama": "^0.1.0",
     "@langchain/openai": "^0.3.0",
     "@prisma/client": "5.20.0",
     "@radix-ui/react-alert-dialog": "^1.1.1",

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts
@@ -1,8 +1,9 @@
 import {NextRequest, NextResponse} from "next/server";
 import {Message as VercelChatMessage} from "ai";
 
-import {ChatOpenAI, OpenAIEmbeddings} from "@langchain/openai";
-import {SystemMessagePromptTemplate} from "@langchain/core/prompts";
+// import {ChatOpenAI, OpenAIEmbeddings} from "@langchain/openai";
+import {ChatOllama, OllamaEmbeddings} from "@langchain/ollama"
+import {PromptTemplate} from "@langchain/core/prompts";
 import {RunnablePassthrough, RunnableSequence} from "@langchain/core/runnables";
 import {HttpResponseOutputParser} from "langchain/output_parsers";
 import {PrismaVectorStore} from "@langchain/community/vectorstores/prisma";
@@ -38,15 +39,13 @@ export async function POST(req: NextRequest) {
             throw new Error("Repository not found");
         }
 
-        const llm = new ChatOpenAI({
-            model: "gpt-4o-mini",
-            temperature: 0,
-            apiKey
+        const llm = new ChatOllama({
+            model: "llama3.2:3b",
         });
 
-        const embeddings = new OpenAIEmbeddings({
-            model: "text-embedding-3-small",
-            apiKey
+        const embeddings = new OllamaEmbeddings({
+            model: "llama3.2:3b", // Default value
+            baseUrl: "http://localhost:11434", // Default value
         });
 
         const vectorStore = PrismaVectorStore.withModel<Document>(db).create(
@@ -73,7 +72,7 @@ export async function POST(req: NextRequest) {
             searchType: "similarity"
         });
 
-        const systemPrompt = SystemMessagePromptTemplate.fromTemplate(`
+        const prompt = PromptTemplate.fromTemplate(`
   You are a helpful assistant with good knowledge in coding. Use the provided context and previous conversation to answer user questions with detailed explanations.
   Read the given context before answering questions and think step by step. If you cannot answer a user question based on the provided context, inform the user. Do not use any other information for answering.
 
@@ -83,7 +82,7 @@ export async function POST(req: NextRequest) {
   {chat_history}
 
   User: {question}
-  `);
+`);
 
         const chain = RunnableSequence.from([
             RunnablePassthrough.assign({
@@ -99,7 +98,7 @@ export async function POST(req: NextRequest) {
                     return input.chat_history || [];
                 }
             }),
-            systemPrompt,
+            prompt,
             llm,
             new HttpResponseOutputParser(),
         ]);

diff --git a/src/services/indexer/index.ts b/src/services/indexer/index.ts
@@ -2,7 +2,7 @@ import {
     GithubRepoLoader
 } from "@langchain/community/document_loaders/web/github";
 import {RecursiveCharacterTextSplitter} from "langchain/text_splitter";
-import {OpenAIEmbeddings} from "@langchain/openai";
+import { OllamaEmbeddings } from "@langchain/ollama";
 import {PrismaVectorStore} from "@langchain/community/vectorstores/prisma";
 import {Prisma, RepositoryStatus, PrismaClient, Document} from "@prisma/client";
 import {Document as LangchainDocument} from "langchain/document"; // Adjust the import path if necessary
@@ -167,8 +167,8 @@ export class Indexer {
      */
     private async splitDocuments(docs: LangchainDocument[]): Promise<LangchainDocument[]> {
         const splitter = new RecursiveCharacterTextSplitter({
-            chunkSize: 2000,
-            chunkOverlap: 200
+            chunkSize: 500,
+            chunkOverlap: 100
         });
 
         const chunks = await splitter.splitDocuments(docs);
@@ -185,10 +185,9 @@ export class Indexer {
     private async storeChunks(chunks: LangchainDocument[], namespace: string, repoUrl: string) {
         console.log(`[${new Date().toISOString()}] Storing ${chunks.length} chunks into the vector store`);
 
-        const openAiKey = await this.getOpenAiToken();
-        const embeddings = new OpenAIEmbeddings({
-            model: "text-embedding-3-small",
-            apiKey: openAiKey,
+        const embeddings = new OllamaEmbeddings({
+            model: "llama3.2:3b", // Default value
+            baseUrl: "http://localhost:11434", // Default value
         });
 
         const vectorStore = PrismaVectorStore.withModel<Document>(this.db).create(