Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: update packages and bugfix #37

Merged
merged 3 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions backend/backend/app/api/routers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi.responses import StreamingResponse
from fastapi.websockets import WebSocketDisconnect
from llama_index.llms.base import ChatMessage
from llama_index.llms.types import MessageRole
from llama_index.memory import ChatMemoryBuffer
from llama_index.prompts import PromptTemplate
from llama_index.core.llms import ChatMessage
from llama_index.core.llms import MessageRole
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core import PromptTemplate
from pydantic import BaseModel

from backend.app.utils import auth
Expand Down
2 changes: 1 addition & 1 deletion backend/backend/app/api/routers/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi.responses import StreamingResponse
from fastapi.websockets import WebSocketDisconnect
from llama_index.llms.types import MessageRole
from llama_index.core.llms import MessageRole
from pydantic import BaseModel

from backend.app.utils import auth
Expand Down
4 changes: 2 additions & 2 deletions backend/backend/app/api/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import re

from fastapi import APIRouter, Depends, HTTPException, status
from llama_index.postprocessor import SimilarityPostprocessor
from llama_index.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.retrievers import VectorIndexRetriever

from backend.app.utils import auth
from backend.app.utils.index import get_index
Expand Down
6 changes: 3 additions & 3 deletions backend/backend/app/utils/contants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@
MODEL_KWARGS = {"n_gpu_layers": 100} if DEVICE_TYPE == "cuda" else {}

# Service Context Constants
CHUNK_SIZE = 1000
CHUNK_SIZE = 1024
CHUNK_OVERLAP = 100

# Embedding Model Constants
EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
EMBED_POOLING = "mean"
EMBED_MODEL_DIMENSIONS = 384 # MiniLM-L6-v2 uses 384 dimensions
DEF_EMBED_MODEL_DIMENSIONS = (
1536 # Default embedding model dimensions used by OpenAI text-embedding-ada-002
1536 # Default embedding model dimensions used by OpenAI text-embedding-3-small
)
EMBED_BATCH_SIZE = 64 # batch size for openai embeddings

# Chat Memory Buffer Constants
MEMORY_TOKEN_LIMIT = 1500 if USE_LOCAL_LLM else 6144
MEMORY_TOKEN_LIMIT = 3072 if USE_LOCAL_LLM else 6144

# Prompt Helper Constants
# set maximum input size
Expand Down
25 changes: 14 additions & 11 deletions backend/backend/app/utils/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@
import os

from dotenv import load_dotenv
from llama_index import (
from llama_index.core import (
PromptHelper,
ServiceContext,
# Document,
SimpleDirectoryReader,
StorageContext,
VectorStoreIndex,
load_index_from_storage,
set_global_service_context,
)
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms import LlamaCPP, OpenAI
from llama_index.llms.llama_utils import (
from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingModelType
from llama_index.legacy.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.legacy.llms.llama_utils import (
completion_to_prompt,
messages_to_prompt,
)
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.supabase import SupabaseVectorStore
from vecs import IndexMeasure

Expand All @@ -45,7 +45,6 @@
USE_LOCAL_VECTOR_STORE,
)

# from llama_index.vector_stores.supabase import SupabaseVectorStore
# import textwrap

load_dotenv()
Expand Down Expand Up @@ -98,7 +97,11 @@
api_key=os.getenv("OPENAI_API_KEY"),
)
# By default, LlamaIndex uses text-embedding-ada-002 from OpenAI
embed_model = OpenAIEmbedding(embed_batch_size=EMBED_BATCH_SIZE)
# Set the model to text-embed-3-small for better performance and cheaper cost
embed_model = OpenAIEmbedding(
model=OpenAIEmbeddingModelType.TEXT_EMBED_3_SMALL,
embed_batch_size=EMBED_BATCH_SIZE,
)

prompt_helper = PromptHelper(
chunk_size_limit=CHUNK_SIZE_LIMIT,
Expand Down Expand Up @@ -149,11 +152,11 @@ def create_index():
show_progress=True,
)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
index.storage_context.persist(new_storage_dir)
logger.info(f"Finished creating new index. Stored in {new_storage_dir}")
else:
# do nothing
logger.info(f"Index already exist at {STORAGE_DIR}...")
logger.info(f"Index already exist at {new_storage_dir}...")
# else, create & store the index in Supabase pgvector
else:
# get the folders in the data directory
Expand Down
2 changes: 1 addition & 1 deletion backend/backend/app/utils/prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
This seems to have significant impact on the output of the LLM.
"""

from llama_index.prompts import PromptTemplate
from llama_index.core import PromptTemplate

# this is specific to Llama-2.

Expand Down
Loading
Loading