From f730525c6b37d850a8114efcbe7f6998437da056 Mon Sep 17 00:00:00 2001 From: Yee Kit Date: Tue, 30 Jan 2024 17:26:49 +0800 Subject: [PATCH] Enhancements & Refactored Code (#10) * Fix: Button loading state not reset on 'back' * Feat: search post processing to filter by average score * Feat: Updated api status error handling * Feat: Updated error handling & timeout duration * Refactored imports & moved global vars to constants.py * Feat: Updated dockerfile to install llama-cpp-python with openblas support by default * Add .env file and update gitignore, pipeline name, robots.txt, middleware, layout, page, sitemap, and navlink components * Fixed Pipeline Name * Updated Check File Size workflow --- .github/workflows/check-file-size.yml | 2 + .github/workflows/pipeline.yml | 3 +- Dockerfile | 35 ++-- backend/.gitignore | 1 + backend/backend/app/api/routers/chat.py | 5 +- backend/backend/app/api/routers/query.py | 5 +- backend/backend/app/api/routers/search.py | 18 +- backend/backend/app/utils/contants.py | 39 ++++ backend/backend/app/utils/index.py | 54 +++-- backend/backend/main.py | 26 ++- backend/example.env | 1 + frontend/app/about/page.tsx | 2 +- frontend/app/components/footer.tsx | 40 ++++ frontend/app/components/header.tsx | 196 +++++------------- frontend/app/components/login-buttons.tsx | 8 +- frontend/app/components/ui/mobilemenu.tsx | 86 ++++++++ frontend/app/components/ui/navlink.tsx | 53 +++++ .../app/components/ui/search/useSearch.tsx | 10 +- frontend/app/layout.tsx | 2 + frontend/app/page.tsx | 2 +- frontend/app/privacy-policy/page.tsx | 92 ++++++++ frontend/app/sign-in/page.tsx | 2 +- frontend/app/status/page.tsx | 11 +- frontend/app/terms-of-service/page.tsx | 31 +++ frontend/middleware.ts | 2 +- frontend/public/robots.txt | 14 ++ frontend/public/sitemap.xml | 20 ++ 27 files changed, 539 insertions(+), 221 deletions(-) create mode 100644 backend/backend/app/utils/contants.py create mode 100644 backend/example.env create mode 100644 frontend/app/components/footer.tsx create mode 100644 frontend/app/components/ui/mobilemenu.tsx create mode 100644 frontend/app/components/ui/navlink.tsx create mode 100644 frontend/app/privacy-policy/page.tsx create mode 100644 frontend/app/terms-of-service/page.tsx create mode 100644 frontend/public/robots.txt create mode 100644 frontend/public/sitemap.xml diff --git a/.github/workflows/check-file-size.yml b/.github/workflows/check-file-size.yml index a9fe522..d127759 100644 --- a/.github/workflows/check-file-size.yml +++ b/.github/workflows/check-file-size.yml @@ -12,6 +12,8 @@ jobs: check-file-size: runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v4 - name: Check large files uses: ppremk/lfs-warning@v3.2 with: diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index f76274b..399123c 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -1,4 +1,4 @@ -name: Test Build and Deploy +name: Pipeline on: push: @@ -6,6 +6,7 @@ on: # to run this workflow manually from the Actions tab workflow_dispatch: +# Test, Build and Deploy the app jobs: check-file-size: uses: ./.github/workflows/check-file-size.yml diff --git a/Dockerfile b/Dockerfile index bef2e7f..e2c8245 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ FROM nvidia/cuda:${CUDA_IMAGE} # Set up a new user named "user" with user ID 1000 RUN useradd -m -u 1000 user -# Install the dependencies +# Install the dependencies & clean up RUN apt-get update && apt-get upgrade -y \ && apt-get install -y git build-essential \ python3.11 gcc wget \ @@ -13,7 +13,10 @@ RUN apt-get update && apt-get upgrade -y \ cmake protobuf-compiler pkg-config \ libclblast-dev libopenblas-dev \ liblapack-dev liblapacke-dev libeigen3-dev libboost-all-dev \ - && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd + && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd \ + # Cleaning cache: + && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + && apt-get clean -y && rm -rf /var/lib/apt/lists/* # Install pip for python 3.11 RUN wget https://bootstrap.pypa.io/get-pip.py && \ @@ -23,21 +26,25 @@ RUN wget https://bootstrap.pypa.io/get-pip.py && \ # Switch to the user 'user' USER user -# Setting build related env vars -ENV CUDA_DOCKER_ARCH=all -ENV LLAMA_CUBLAS=1 - -# Set home to the user's home directory and Poetry's environment variables -ENV HOME=/home/user \ - PATH=/home/user/.local/bin:$PATH \ +# Setting build / container related env vars +ENV CUDA_DOCKER_ARCH=all \ + LLAMA_CUBLAS=1 \ + # Set home to the user's home directory and Poetry's environment variables + HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH \ PYTHONUNBUFFERED=1 \ POETRY_NO_INTERACTION=1 \ POETRY_VIRTUALENVS_IN_PROJECT=1 \ POETRY_VIRTUALENVS_CREATE=1 \ POETRY_CACHE_DIR=/tmp/poetry_cache \ - # Build llama-cpp-python with default cuda support - CMAKE_ARGS="-DLLAMA_CUBLAS=on" - # CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" + # Set the uvicorn env + ENVIRONMENT=prod \ + ########################################################## + # Build llama-cpp-python with cuda support + # CMAKE_ARGS="-DLLAMA_CUBLAS=on" + # Build llama-cpp-python with openblas support on CPU + CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" + ########################################################## # Set the working directory to /app WORKDIR $HOME/app @@ -61,4 +68,8 @@ RUN poetry install --without dev,torch-cpu && \ # Change to the package directory WORKDIR $HOME/app/backend +# Make port 8000 available to the world outside this container +EXPOSE 8000 + +# Run the app when the container launches CMD ["poetry", "run", "uvicorn", "main:app", "--host", "0.0.0.0"] \ No newline at end of file diff --git a/backend/.gitignore b/backend/.gitignore index 069fcb4..0fb5f02 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1,2 +1,3 @@ __pycache__ storage +.env \ No newline at end of file diff --git a/backend/backend/app/api/routers/chat.py b/backend/backend/app/api/routers/chat.py index ef9910c..2a32a1d 100644 --- a/backend/backend/app/api/routers/chat.py +++ b/backend/backend/app/api/routers/chat.py @@ -1,8 +1,6 @@ import logging from typing import List -from app.utils.index import get_index -from app.utils.json import json_to_model from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from fastapi.websockets import WebSocketDisconnect @@ -13,6 +11,9 @@ from llama_index.prompts import PromptTemplate from pydantic import BaseModel +from backend.app.utils.index import get_index +from backend.app.utils.json import json_to_model + chat_router = r = APIRouter() """ diff --git a/backend/backend/app/api/routers/query.py b/backend/backend/app/api/routers/query.py index a00e7bf..2defacb 100644 --- a/backend/backend/app/api/routers/query.py +++ b/backend/backend/app/api/routers/query.py @@ -1,8 +1,6 @@ import logging from typing import List -from app.utils.index import get_index -from app.utils.json import json_to_model from fastapi import APIRouter, Depends, HTTPException, Request, status from fastapi.responses import StreamingResponse from fastapi.websockets import WebSocketDisconnect @@ -10,6 +8,9 @@ from llama_index.llms.types import MessageRole from pydantic import BaseModel +from backend.app.utils.index import get_index +from backend.app.utils.json import json_to_model + query_router = r = APIRouter() """ diff --git a/backend/backend/app/api/routers/search.py b/backend/backend/app/api/routers/search.py index b234166..938be3b 100644 --- a/backend/backend/app/api/routers/search.py +++ b/backend/backend/app/api/routers/search.py @@ -1,12 +1,13 @@ import logging import re -from app.utils.index import get_index from fastapi import APIRouter, Depends, HTTPException, Request, status from llama_index import VectorStoreIndex from llama_index.postprocessor import SimilarityPostprocessor from llama_index.retrievers import VectorIndexRetriever +from backend.app.utils.index import get_index + search_router = r = APIRouter() """ @@ -36,17 +37,22 @@ async def search( index=index, similarity_top_k=10, ) - # similarity postprocessor: filter nodes below 0.45 similarity score - node_postprocessor = SimilarityPostprocessor(similarity_cutoff=0.45) # retrieve results query_results = retriever.retrieve(query) query_results_scores = [result.get_score() for result in query_results] + # get average score + average_score = sum(query_results_scores) / len(query_results_scores) + logger.info(f"Search results similarity score: {query_results_scores}") + logger.info(f"Average similarity score: {average_score}") + + # similarity postprocessor: filter nodes below 0.45 similarity score + node_postprocessor = SimilarityPostprocessor(similarity_cutoff=average_score) - # postprocess results + # postprocess results based on average score filtered_results = node_postprocessor.postprocess_nodes(query_results) filtered_results_scores = [result.get_score() for result in filtered_results] @@ -68,9 +74,7 @@ async def search( "^_+ | _+$", "", node_dict["text"] ) # remove leading and trailing underscores data["text"] = cleaned_text - data["similarity_score"] = round( - node.get_score(), 2 - ) # round to 2 decimal places + data["similarity_score"] = node.get_score() response.append(data) id += 1 diff --git a/backend/backend/app/utils/contants.py b/backend/backend/app/utils/contants.py new file mode 100644 index 0000000..ef166b7 --- /dev/null +++ b/backend/backend/app/utils/contants.py @@ -0,0 +1,39 @@ +######################################################################## +# Model Constants for the backend app # +######################################################################## +from pathlib import Path + +from torch.cuda import is_available as is_cuda_available + +# Model Constants +MAX_NEW_TOKENS = 4096 +CONTEXT_SIZE = MAX_NEW_TOKENS +DEVICE_TYPE = "cuda" if is_cuda_available() else "cpu" + +# Get the current directory +CUR_DIR = Path.cwd() + +STORAGE_DIR = str(CUR_DIR / "storage") # directory to cache the generated index +DATA_DIR = str(CUR_DIR / "data") # directory containing the documents to index + +# LLM Model Constants +LLM_MODEL_URL = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf" +# Model Kwargs +# set to at least 1 to use GPU, adjust according to your GPU memory, but must be able to fit the model +MODEL_KWARGS = {"n_gpu_layers": 100} if DEVICE_TYPE == "cuda" else {} + +# Service Context Constants +CHUNK_SIZE = 1000 +CHUNK_OVERLAP = 100 + +# Embedding Model Constants +EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" +EMBED_POOLING = "mean" + +# Prompt Helper Constants +# set maximum input size +CHUNK_SIZE_LIMIT = MAX_NEW_TOKENS +# set number of output tokens +NUM_OUTPUT = 256 +# set maximum chunk overlap +CHUNK_OVERLAP_RATIO = 0.2 diff --git a/backend/backend/app/utils/index.py b/backend/backend/app/utils/index.py index 45350ee..7c59767 100644 --- a/backend/backend/app/utils/index.py +++ b/backend/backend/app/utils/index.py @@ -1,6 +1,5 @@ import logging import os -from pathlib import Path from llama_index import ( PromptHelper, @@ -17,29 +16,26 @@ completion_to_prompt, messages_to_prompt, ) -from torch.cuda import is_available as is_cuda_available -MAX_NEW_TOKENS = 4096 -CONTEXT_SIZE = MAX_NEW_TOKENS -MODEL_ID = "TheBloke/Llama-2-7B-Chat-GGUF" -DEVICE_TYPE = "cuda" if is_cuda_available() else "cpu" - -# Get the current directory -current_directory = Path.cwd() - -STORAGE_DIR = str( - current_directory / "storage" -) # directory to cache the generated index -DATA_DIR = str( - current_directory / "data" -) # directory containing the documents to index - - -# set to at least 1 to use GPU, adjust according to your GPU memory, but must be able to fit the model -model_kwargs = {"n_gpu_layers": 100} if DEVICE_TYPE == "cuda" else {} +from backend.app.utils.contants import ( + CHUNK_OVERLAP, + CHUNK_OVERLAP_RATIO, + CHUNK_SIZE, + CHUNK_SIZE_LIMIT, + CONTEXT_SIZE, + DATA_DIR, + DEVICE_TYPE, + EMBED_MODEL_NAME, + EMBED_POOLING, + LLM_MODEL_URL, + MAX_NEW_TOKENS, + MODEL_KWARGS, + NUM_OUTPUT, + STORAGE_DIR, +) llm = LlamaCPP( - model_url="https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf", + model_url=LLM_MODEL_URL, temperature=0.1, max_new_tokens=MAX_NEW_TOKENS, # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room @@ -47,7 +43,7 @@ # kwargs to pass to __call__() # generate_kwargs={}, # kwargs to pass to __init__() - model_kwargs=model_kwargs, + model_kwargs=MODEL_KWARGS, # transform inputs into Llama2 format messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, @@ -63,22 +59,22 @@ max_chunk_overlap = 0.2 embed_model = HuggingFaceEmbedding( - model_name="sentence-transformers/all-MiniLM-L6-v2", - pooling="mean", + model_name=EMBED_MODEL_NAME, + pooling=EMBED_POOLING, device=DEVICE_TYPE, ) prompt_helper = PromptHelper( - chunk_size_limit=4096, - chunk_overlap_ratio=0.2, - num_output=256, + chunk_size_limit=CHUNK_SIZE_LIMIT, + chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, + num_output=NUM_OUTPUT, ) service_context = ServiceContext.from_defaults( llm=llm, embed_model=embed_model, - chunk_size=1000, - chunk_overlap=100, + chunk_size=CHUNK_SIZE, + chunk_overlap=CHUNK_OVERLAP, prompt_helper=prompt_helper, ) diff --git a/backend/backend/main.py b/backend/backend/main.py index 65cb640..6f87bf9 100644 --- a/backend/backend/main.py +++ b/backend/backend/main.py @@ -1,29 +1,31 @@ import logging import os -from app.api.routers.chat import chat_router -from app.api.routers.healthcheck import healthcheck_router -from app.api.routers.query import query_router -from app.api.routers.search import search_router -from app.utils.index import create_index from dotenv import load_dotenv from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from torch.cuda import is_available as is_cuda_available +from backend.app.api.routers.chat import chat_router +from backend.app.api.routers.healthcheck import healthcheck_router +from backend.app.api.routers.query import query_router +from backend.app.api.routers.search import search_router +from backend.app.utils.index import create_index + load_dotenv() app = FastAPI() environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set -# TODO: Add reading allowed origins from environment variables +# Add allowed origins from environment variables +allowed_origins = os.getenv("ALLOWED_ORIGINS", "*") if environment == "dev": logger = logging.getLogger("uvicorn") logger.warning("Running in development mode - allowing CORS for all origins") app.add_middleware( - CORSMiddleware, + middleware_class=CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], @@ -32,19 +34,15 @@ if environment == "prod": # In production, specify the allowed origins - allowed_origins = [ - "https://your-production-domain.com", - "https://another-production-domain.com", - # Add more allowed origins as needed - ] + allowed_origins = allowed_origins.split(",") if allowed_origins != "*" else ["*"] logger = logging.getLogger("uvicorn") logger.info(f"Running in production mode - allowing CORS for {allowed_origins}") app.add_middleware( - CORSMiddleware, + middleware_class=CORSMiddleware, allow_origins=allowed_origins, allow_credentials=True, - allow_methods=["GET", "POST", "PUT", "DELETE"], + allow_methods=["GET", "POST"], allow_headers=["*"], ) diff --git a/backend/example.env b/backend/example.env new file mode 100644 index 0000000..73f9e7f --- /dev/null +++ b/backend/example.env @@ -0,0 +1 @@ +ALLOWED_ORIGINS=http://localhost:3000 \ No newline at end of file diff --git a/frontend/app/about/page.tsx b/frontend/app/about/page.tsx index 79a886b..b2fb382 100644 --- a/frontend/app/about/page.tsx +++ b/frontend/app/about/page.tsx @@ -3,7 +3,7 @@ export default function About() { return ( -
+

About Smart Retrieval

diff --git a/frontend/app/components/footer.tsx b/frontend/app/components/footer.tsx new file mode 100644 index 0000000..4c99c46 --- /dev/null +++ b/frontend/app/components/footer.tsx @@ -0,0 +1,40 @@ +"use client"; + +import { FooterNavLink } from "./ui/navlink"; +import { IconGitHub } from "./ui/icons"; +import { Text, Cookie } from "lucide-react"; + +export default function Footer() { + return ( +
+
+
+

+ © 2024 JTC DBE. All rights reserved. +

+
+
+ +
+ + Github +
+
+ +
+ + Terms of Service +
+
+ +
+ + Privacy Policy +
+
+
+
+
+ ); +} + diff --git a/frontend/app/components/header.tsx b/frontend/app/components/header.tsx index 81553ec..7ebccf3 100644 --- a/frontend/app/components/header.tsx +++ b/frontend/app/components/header.tsx @@ -1,140 +1,47 @@ "use client"; -import Link from 'next/link'; import Image from 'next/image'; import { Home, InfoIcon, MessageCircle, Search, FileQuestion, Menu, X } from 'lucide-react'; -import { usePathname } from 'next/navigation'; import { useTheme } from "next-themes"; -import { useEffect, useState, useRef } from "react"; +import { useEffect, useState } from "react"; import { useMedia } from 'react-use'; -import useSWR from 'swr' -import logo from '../../public/smart-retrieval-logo.webp' - -interface NavLinkProps { - href: string; - children: React.ReactNode; - onClick?: () => void; // Include onClick as an optional prop -} - -interface MobileMenuProps { - isOpen: boolean; - onClose: () => void; -} - -const MobileMenu: React.FC = ({ isOpen, onClose }) => { - const isLargeScreen = useMedia('(min-width: 1024px)', false); - const menuRef = useRef(null); - - useEffect(() => { - const handleOutsideClick = (event: MouseEvent | TouchEvent) => { - if ( - !isLargeScreen && - isOpen && - !menuRef.current?.contains(event.target as Node) && - !((event.target as HTMLElement).closest('.toggle-button')) // Exclude the toggle button - ) { - onClose(); // Close the menu - } - }; - - if (!isLargeScreen && isOpen) { - // Add event listeners for both mouse and touch events - document.addEventListener('mousedown', handleOutsideClick); - } - - return () => { - // Remove the event listener when the component unmounts - document.removeEventListener('mousedown', handleOutsideClick); - }; - }, [isLargeScreen, isOpen, onClose]); - - useEffect(() => { - if (isLargeScreen && isOpen) { - onClose(); - } - }, [isLargeScreen, isOpen, onClose]); - return ( -
-
- Logo -
-
- {/* Mobile menu content */} -
- -
- - Home -
-
- -
- - About -
-
- -
- - Chat -
-
- -
- - Q&A -
-
- -
- - Search -
-
-
-
-
- ); -}; - -const NavLink: React.FC = ({ href, children, onClick }) => { - // Use the useRouter hook to get information about the current route - const pathname = usePathname(); - - // Determine if the current tab is active - const isActive = pathname === href; - - const handleClick = () => { - if (onClick) { - onClick(); // Call the onClick handler if provided - } - }; - - return ( - - {/* Add a class to highlight the active tab */} -
- {children} -
- - ); -}; +import useSWR from 'swr'; +import logo from '../../public/smart-retrieval-logo.webp'; +import { HeaderNavLink } from './ui/navlink'; +import { MobileMenu } from './ui/mobilemenu'; + +const MobileMenuItems = [ + { + href: '/', + icon: , + label: 'Home', + }, + { + href: '/about', + icon: , + label: 'About', + }, + { + href: '/chat', + icon: , + label: 'Chat', + }, + { + href: '/query', + icon: , + label: 'Q&A', + }, + { + href: '/search', + icon: , + label: 'Search', + }, +]; export default function Header() { const isLargeScreen = useMedia('(min-width: 1024px)', false); const [mounted, setMounted] = useState(false); const { theme, setTheme } = useTheme(); - // const [apiStatus, setApiStatus] = useState(false); // Use SWR for API status fetching const healthcheck_api = process.env.NEXT_PUBLIC_HEALTHCHECK_API; const { data: apiStatus, error: apiError } = useSWR(healthcheck_api, async (url) => { @@ -149,7 +56,7 @@ export default function Header() { const data = await response.json(); return data; } catch (error: any) { - console.error('Error fetching Backend API Status:', error.message); + console.error('Error fetching Backend API Status'); throw error; } }, { @@ -158,7 +65,14 @@ export default function Header() { refreshInterval: 60000, // Revalidate every 60 seconds }); if (apiError) { - console.error('[Header] Error fetching Backend API Status:', apiError.message); + if (apiError.name === 'AbortError') { + console.error('[Header] Error fetching Backend API Status: Request timed out'); + } + else { + console.error('[Header] Error fetching Backend API Status:', apiError.message); + } + } else { + console.log('[Header] API Status:', apiStatus); } useEffect(() => { @@ -222,43 +136,43 @@ export default function Header() {
{/* Mobile menu component */} - setMobileMenuOpen(false)} /> + setMobileMenuOpen(false)} logoSrc={logo} items={MobileMenuItems} />
- +
Home
-
- + +
About
-
- + +
Chat
-
- + +
Q&A
-
- + +
Search
-
+
{/* Status Page Button/Indicator */} API: - +
{apiError ? ( @@ -270,7 +184,7 @@ export default function Header() { )}
-
+ | {/* Toggle button with icon based on the theme */}