Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored code and fixed bugs #31

Merged
merged 1 commit into from
May 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions backend/backend/app/api/routers/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pydantic import BaseModel

from backend.app.utils import auth
from backend.app.utils.contants import MEMORY_TOKEN_LIMIT
from backend.app.utils.index import get_index
from backend.app.utils.json import json_to_model

Expand Down Expand Up @@ -110,7 +111,7 @@ async def chat(

memory = ChatMemoryBuffer.from_defaults(
chat_history=messages,
token_limit=4096,
token_limit=MEMORY_TOKEN_LIMIT,
)

logger.info(f"Memory: {memory.get()}")
Expand All @@ -121,14 +122,15 @@ async def chat(
memory=memory,
context_prompt=(
"You are a helpful chatbot, able to have normal interactions, as well as answer questions"
" regarding information relating to the Public Sector Standard Conditions Of Contract (PSSCOC) Documents and JTC's Employer Information Requirements (EIR) Documents.\n"
"All the documents are in the context of the construction industry in Singapore.\n"
" regarding information relating but not limited to the Public Sector Standard Conditions Of Contract (PSSCOC) Documents and JTC's Employer Information Requirements (EIR) Documents.\n"
"PSSCOC and EIR documents are in the context of the construction industry in Singapore.\n"
"Here are the relevant documents for the context:\n"
"{context_str}"
"\nInstruction: Based on the above documents, provide a detailed answer for the user question below.\n"
"If you cannot answer the question or are unsure of how to answer, inform the user that you do not know.\n"
"If you need to clarify the question, ask the user for clarification.\n"
"You are to provide the relevant sources of which you got the information from in the context in brackets."
"You are to provide the relevant sources including but not limited to the file name, and page of which you got the information from in the context in brackets.\n"
"Should there be a full file path, remove the file path and only include the file name in the context."
),
)
response = chat_engine.stream_chat(
Expand Down
10 changes: 9 additions & 1 deletion backend/backend/app/utils/contants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
########################################################################
# Model Constants for the backend app #
########################################################################
import os
from pathlib import Path

from torch.cuda import is_available as is_cuda_available

# ENV variables
USE_LOCAL_LLM = bool(os.getenv("USE_LOCAL_LLM").lower() == "true")
USE_LOCAL_VECTOR_STORE = bool(os.getenv("USE_LOCAL_VECTOR_STORE").lower() == "true")

# Model Constants
MAX_NEW_TOKENS = 4096
CONTEXT_SIZE = 3900 # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
Expand Down Expand Up @@ -34,7 +39,10 @@
DEF_EMBED_MODEL_DIMENSIONS = (
1536 # Default embedding model dimensions used by OpenAI text-embedding-ada-002
)
EMBED_BATCH_SIZE = 10 # batch size for openai embeddings
EMBED_BATCH_SIZE = 64 # batch size for openai embeddings

# Chat Memory Buffer Constants
MEMORY_TOKEN_LIMIT = 1500 if USE_LOCAL_LLM else 6144

# Prompt Helper Constants
# set maximum input size
Expand Down
6 changes: 2 additions & 4 deletions backend/backend/app/utils/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
MODEL_KWARGS,
NUM_OUTPUT,
STORAGE_DIR,
USE_LOCAL_LLM,
USE_LOCAL_VECTOR_STORE,
)

# from llama_index.vector_stores.supabase import SupabaseVectorStore
Expand All @@ -49,10 +51,6 @@
load_dotenv()
logger = logging.getLogger("uvicorn")

# ENV variables
USE_LOCAL_LLM = bool(os.getenv("USE_LOCAL_LLM").lower() == "true")
USE_LOCAL_VECTOR_STORE = bool(os.getenv("USE_LOCAL_VECTOR_STORE").lower() == "true")


# use local LLM if USE_LOCAL_LLM is set to True, else use openai's API
if USE_LOCAL_LLM:
Expand Down
14 changes: 7 additions & 7 deletions frontend/app/api/admin/collections/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ export async function PUT(request: NextRequest) {
const { collection_id, is_public } = await request.json();

// Update the collection data in the database
const { data, error } = await supabase
const { data: updateData, error: updateError } = await supabase
.from('collections')
.update({ is_public: is_public })
.match({ collection_id });
.eq('collection_id', collection_id);

if (error) {
console.error('Error updating collection data in database:', error.message);
return NextResponse.json({ error: error.message }, { status: 500 });
if (updateError) {
console.error('Error updating collection data in database:', updateError.message);
return NextResponse.json({ error: updateError.message }, { status: 500 });
}

// console.log('Updated collection:', data);

// Delete the collection requests data in the database (Since it is manually updated by Admin)
const { data: delData, error: delError } = await supabase
.from('collection_requests')
.from('collections_requests')
.delete()
.match({ collection_id });
.eq('collection_id', collection_id);

if (delError) {
console.error('Error deleting collection requests data in database:', delError.message);
Expand Down
40 changes: 24 additions & 16 deletions frontend/app/api/user/collections/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,15 @@ export async function DELETE(request: NextRequest) {
authorization = null; // Clear the authorization token
}

// Create default delete_vecs variable
let is_delete_vecs = true;
// Retrieve the collection_id from the request body
const { collection_id } = await request?.json();
const { collection_id, delete_vecs } = await request?.json();

// if delete_vecs is not undefined, take its value
if (delete_vecs !== undefined) {
is_delete_vecs = delete_vecs;
}

// Retrieve the user's ID from the session token
const { data: sessionData, error: sessionError } = await supabaseAuth
Expand All @@ -148,23 +155,24 @@ export async function DELETE(request: NextRequest) {
return NextResponse.json({ error: sessionError.message }, { status: 500 });
}

// Delete the vector collection from the vecs schema via POST request to Backend API
const deleteVecsResponse = await fetch(`${process.env.DELETE_SINGLE_COLLECTION_API}?collection_id=${collection_id}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': authorization,
'X-API-Key': api_key,
} as any,
body: JSON.stringify({ collection_id: collection_id }),
});

if (!deleteVecsResponse.ok) {
console.error('Error deleting', collection_id, 'from vecs schema:', deleteVecsResponse.statusText);
return NextResponse.json({ error: deleteVecsResponse.statusText }, { status: deleteVecsResponse.status });
if (is_delete_vecs === true) {
// Delete the vector collection from the vecs schema via POST request to Backend API
const deleteVecsResponse = await fetch(`${process.env.DELETE_SINGLE_COLLECTION_API}?collection_id=${collection_id}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': authorization,
'X-API-Key': api_key,
} as any,
body: JSON.stringify({ collection_id: collection_id }),
});

if (!deleteVecsResponse.ok) {
console.error('Error deleting', collection_id, 'from vecs schema:', deleteVecsResponse.statusText);
return NextResponse.json({ error: deleteVecsResponse.statusText }, { status: deleteVecsResponse.status });
}
}


// Delete the collection data from the database
const { data: deleteData, error: deleteError } = await supabase
.from('collections')
Expand Down
3 changes: 3 additions & 0 deletions frontend/app/components/chat-section.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { ChatSelection } from "@/app/components/ui/chat";
import { AutofillQuestion } from "@/app/components/ui/autofill-prompt";
import { useSession } from "next-auth/react";
import { useState } from "react";
import { ToastContainer } from 'react-toastify';
import 'react-toastify/dist/ReactToastify.css';

export default function ChatSection() {
const { data: session } = useSession();
Expand Down Expand Up @@ -34,6 +36,7 @@ export default function ChatSection() {

return (
<div className="space-y-4 max-w-5xl w-full relative">
<ToastContainer />
{collSelectedId ?
(
<>
Expand Down
3 changes: 3 additions & 0 deletions frontend/app/components/search-section.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import { useState, ChangeEvent, FormEvent } from "react";
import { AutofillSearchQuery } from "@/app/components/ui/autofill-prompt";
import { SearchSelection, useSearch, SearchResults, SearchInput } from "./ui/search";
import { ToastContainer } from 'react-toastify';
import 'react-toastify/dist/ReactToastify.css';

const SearchSection: React.FC = () => {
const [query, setQuery] = useState("");
Expand All @@ -25,6 +27,7 @@ const SearchSection: React.FC = () => {

return (
<div className="space-y-4 max-w-5xl w-full">
<ToastContainer />
{collSelectedId ? (
<>
<SearchInput
Expand Down
4 changes: 2 additions & 2 deletions frontend/app/components/ui/admin/admin-manage-collections.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ export default function AdminManageCollections() {
icon: 'success',
confirmButtonColor: '#4caf50',
});
// Remove approved request from the list
setCollectionsData(collectionsData.filter((collection) => collection.collection_id !== collectionId));
// Refresh the collections data
fetchCollections();
}).catch((error) => {
console.error('Error setting collection Public:', error);
// Show error dialog
Expand Down
3 changes: 0 additions & 3 deletions frontend/app/components/ui/chat/chat-message.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@ import ChatAvatar from "@/app/components/ui/chat/chat-avatar";
import { Message } from "@/app/components/ui/chat/chat.interface";
import Markdown from "@/app/components/ui/chat/markdown";
import { useCopyToClipboard } from "@/app/components/ui/chat/use-copy-to-clipboard";
import { ToastContainer } from 'react-toastify';
import 'react-toastify/dist/ReactToastify.css';

export default function ChatMessage(chatMessage: Message) {
const { isCopied, copyToClipboard } = useCopyToClipboard({ timeout: 2000 });
return (
<div className="flex items-start gap-4 pr-5 pt-5">
<ToastContainer />
<ChatAvatar role={chatMessage.role} />
<div className="group flex flex-1 justify-between gap-2">
<div className="flex-1">
Expand Down
72 changes: 63 additions & 9 deletions frontend/app/components/ui/query/query-document-upload.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"use client";

import { useState, useRef } from 'react';
import { useState } from 'react';
import { toast } from 'react-toastify';
import Swal from 'sweetalert2';
import { AlertTriangle } from "lucide-react";
Expand All @@ -19,12 +19,20 @@ export default function QueryDocumentUpload() {
const indexerApi = process.env.NEXT_PUBLIC_INDEXER_API;
const { data: session } = useSession();
const supabaseAccessToken = session?.supabaseAccessToken;
const [createdCollectionId, setCreatedCollectionId] = useState<string>('');
// NOTE: allowedTypes is an array of allowed MIME types for file uploads
// The allowedTypesString is a string of allowed file extensions for the file input
// Both must be kept in sync to ensure that the file input only accepts the allowed file types
const allowedTypes = ['application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'text/plain', 'application/json'];
const allowedTypesString = ".pdf,.doc,.docx,.xls,xlsx,.txt,.json";

const MAX_FILES = 10; // Maximum number of files allowed
const MAX_TOTAL_SIZE = 15 * 1024 * 1024; // Maximum total size allowed (15 MB in bytes)
const MAX_FILES = 15; // Maximum number of files allowed
const MAX_TOTAL_SIZE_MB = 60; // Maximum total size allowed in MB (15 MB)
const MAX_TOTAL_SIZE = MAX_TOTAL_SIZE_MB * 1024 * 1024; // Maximum total size allowed in bytes (15 MB in bytes)
// The total size of all selected files should not exceed this value

const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
setFileError(false);
const selectedFiles = event.target.files;
if (selectedFiles) {
const fileList = Array.from(selectedFiles);
Expand All @@ -50,24 +58,23 @@ export default function QueryDocumentUpload() {
// Check if the total size exceeds the maximum allowed
if (totalSize > MAX_TOTAL_SIZE) {
// Show toast notification
toast.error(`Total size of selected files exceeds the maximum allowed (${MAX_TOTAL_SIZE} bytes).`, {
toast.error(`Total size of selected files exceeds the maximum allowed (${MAX_TOTAL_SIZE_MB} MB).`, {
position: "top-right",
});
setFileError(true);
setFileErrorMsg(`Total size of selected files exceeds the maximum allowed (${MAX_TOTAL_SIZE} bytes).`);
setFileErrorMsg(`Total size of selected files exceeds the maximum allowed (${MAX_TOTAL_SIZE_MB} MB).`);
return;
}

// Check if the file types are allowed
const allowedTypes = ['application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'text/plain'];
const invalidFiles = fileList.filter(file => !allowedTypes.includes(file.type));
if (invalidFiles.length) {
// Show toast notification
toast.error(`Invalid file type(s) selected!`, {
position: "top-right",
});
setFileError(true);
setFileErrorMsg(`Invalid file type(s) selected!`);
setFileErrorMsg(`Only ${allowedTypesString} file type(s) allowed!`);
return;
}

Expand Down Expand Up @@ -145,6 +152,7 @@ export default function QueryDocumentUpload() {
// Get the response data
const data = await response.json();
console.log('Insert New Collection Results:', data);
setCreatedCollectionId(data.collectionId);
// Show success dialog
Swal.fire({
title: 'Success!',
Expand All @@ -157,7 +165,7 @@ export default function QueryDocumentUpload() {
// Create a new FormData object
const formData = new FormData();
// Append the collection_id to the FormData object
formData.append('collection_id', data.collectionId);
formData.append('collection_id', createdCollectionId);
// Append each file to the FormData object
files.forEach((file, index) => {
formData.append('files', file);
Expand Down Expand Up @@ -204,6 +212,28 @@ export default function QueryDocumentUpload() {
closeButton: true,
isLoading: false
});
// Delete the previously inserted collection from the database
fetch('/api/user/collections', {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
collection_id: createdCollectionId,
delete_vecs: false,
}),
})
.then(async response => {
if (response.ok) {
// Get the response data
const data = await response.json();
console.log('Delete Collection Results:', data);
} else {
const data = await response.json();
// Log to console
console.error('Error deleting collection:', data.error);
}
});
}
})
.catch(error => {
Expand All @@ -218,6 +248,28 @@ export default function QueryDocumentUpload() {
closeButton: true,
isLoading: false
});
// Delete the previously inserted collection from the database
fetch('/api/user/collections', {
method: 'DELETE',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
collection_id: createdCollectionId,
delete_vecs: false,
}),
})
.then(async response => {
if (response.ok) {
// Get the response data
const data = await response.json();
console.log('Delete Collection Results:', data);
} else {
const data = await response.json();
// Log to console
console.error('Error deleting collection:', data.error);
}
});
});
} else {
const data = await response.json();
Expand All @@ -244,6 +296,8 @@ export default function QueryDocumentUpload() {
});
setisLoading(false);
});
// Reset createCollectionId state
setCreatedCollectionId('');
}
else {
setisLoading(false);
Expand Down Expand Up @@ -296,7 +350,7 @@ export default function QueryDocumentUpload() {
id="fileUpload"
title='Select Files'
multiple
accept=".pdf,.doc,.docx,.xls,xlsx,.txt"
accept={allowedTypesString}
onChange={handleFileChange}
className={`h-12 rounded-lg w-full bg-gray-300 dark:bg-zinc-700/65 border px-2 py-2 ${fileError ? 'border-red-500' : ''}`}
/>
Expand Down
Loading
Loading