V.0.3.1.8 🐛 Bugfixes (#37)

* Updated Packages * Bugfix: Check if requests exists before deleting * Bugfix: Update Collections not working Changed Put to Post req, moved to sub api route
digitalbuiltenvironment · Jul 18, 2024 · 63dc01e · 63dc01e
1 parent c9a51c1
commit 63dc01e
Show file tree

Hide file tree

Showing 12 changed files with 902 additions and 409 deletions.
diff --git a/backend/backend/app/api/routers/chat.py b/backend/backend/app/api/routers/chat.py
@@ -4,10 +4,10 @@
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketDisconnect
-from llama_index.llms.base import ChatMessage
-from llama_index.llms.types import MessageRole
-from llama_index.memory import ChatMemoryBuffer
-from llama_index.prompts import PromptTemplate
+from llama_index.core.llms import ChatMessage
+from llama_index.core.llms import MessageRole
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.core import PromptTemplate
 from pydantic import BaseModel
 
 from backend.app.utils import auth

diff --git a/backend/backend/app/api/routers/query.py b/backend/backend/app/api/routers/query.py
@@ -4,7 +4,7 @@
 from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketDisconnect
-from llama_index.llms.types import MessageRole
+from llama_index.core.llms import MessageRole
 from pydantic import BaseModel
 
 from backend.app.utils import auth

diff --git a/backend/backend/app/api/routers/search.py b/backend/backend/app/api/routers/search.py
@@ -2,8 +2,8 @@
 import re
 
 from fastapi import APIRouter, Depends, HTTPException, status
-from llama_index.postprocessor import SimilarityPostprocessor
-from llama_index.retrievers import VectorIndexRetriever
+from llama_index.core.postprocessor import SimilarityPostprocessor
+from llama_index.core.retrievers import VectorIndexRetriever
 
 from backend.app.utils import auth
 from backend.app.utils.index import get_index

diff --git a/backend/backend/app/utils/contants.py b/backend/backend/app/utils/contants.py
@@ -29,20 +29,20 @@
 MODEL_KWARGS = {"n_gpu_layers": 100} if DEVICE_TYPE == "cuda" else {}
 
 # Service Context Constants
-CHUNK_SIZE = 1000
+CHUNK_SIZE = 1024
 CHUNK_OVERLAP = 100
 
 # Embedding Model Constants
 EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 EMBED_POOLING = "mean"
 EMBED_MODEL_DIMENSIONS = 384  # MiniLM-L6-v2 uses 384 dimensions
 DEF_EMBED_MODEL_DIMENSIONS = (
-    1536  # Default embedding model dimensions used by OpenAI text-embedding-ada-002
+    1536  # Default embedding model dimensions used by OpenAI text-embedding-3-small
 )
 EMBED_BATCH_SIZE = 64  # batch size for openai embeddings
 
 # Chat Memory Buffer Constants
-MEMORY_TOKEN_LIMIT = 1500 if USE_LOCAL_LLM else 6144
+MEMORY_TOKEN_LIMIT = 3072 if USE_LOCAL_LLM else 6144
 
 # Prompt Helper Constants
 # set maximum input size

diff --git a/backend/backend/app/utils/index.py b/backend/backend/app/utils/index.py
@@ -2,23 +2,23 @@
 import os
 
 from dotenv import load_dotenv
-from llama_index import (
+from llama_index.core import (
     PromptHelper,
     ServiceContext,
-    # Document,
     SimpleDirectoryReader,
     StorageContext,
     VectorStoreIndex,
     load_index_from_storage,
     set_global_service_context,
 )
-from llama_index.embeddings import HuggingFaceEmbedding
-from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.llms import LlamaCPP, OpenAI
-from llama_index.llms.llama_utils import (
+from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType
+from llama_index.legacy.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.legacy.llms.llama_utils import (
     completion_to_prompt,
     messages_to_prompt,
 )
+from llama_index.llms.llama_cpp import LlamaCPP
+from llama_index.llms.openai import OpenAI
 from llama_index.vector_stores.supabase import SupabaseVectorStore
 from vecs import IndexMeasure
 
@@ -45,7 +45,6 @@
     USE_LOCAL_VECTOR_STORE,
 )
 
-# from llama_index.vector_stores.supabase import SupabaseVectorStore
 # import textwrap
 
 load_dotenv()
@@ -98,7 +97,11 @@
         api_key=os.getenv("OPENAI_API_KEY"),
     )
     # By default, LlamaIndex uses text-embedding-ada-002 from OpenAI
-    embed_model = OpenAIEmbedding(embed_batch_size=EMBED_BATCH_SIZE)
+    # Set the model to text-embed-3-small for better performance and cheaper cost
+    embed_model = OpenAIEmbedding(
+        model=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL,
+        embed_batch_size=EMBED_BATCH_SIZE,
+    )
 
     prompt_helper = PromptHelper(
         chunk_size_limit=CHUNK_SIZE_LIMIT,
@@ -149,11 +152,11 @@ def create_index():
                     show_progress=True,
                 )
                 # store it for later
-                index.storage_context.persist(STORAGE_DIR)
-                logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
+                index.storage_context.persist(new_storage_dir)
+                logger.info(f"Finished creating new index. Stored in {new_storage_dir}")
             else:
                 # do nothing
-                logger.info(f"Index already exist at {STORAGE_DIR}...")
+                logger.info(f"Index already exist at {new_storage_dir}...")
     # else, create & store the index in Supabase pgvector
     else:
         # get the folders in the data directory

diff --git a/backend/backend/app/utils/prompt_template.py b/backend/backend/app/utils/prompt_template.py
@@ -4,7 +4,7 @@
 This seems to have significant impact on the output of the LLM.
 """
 
-from llama_index.prompts import PromptTemplate
+from llama_index.core import PromptTemplate
 
 # this is specific to Llama-2.