diff --git a/.gitignore b/.gitignore index dc1be4609..76f7614a0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ **/node_modules **/.svelte-kit -**/package-lock.json \ No newline at end of file +**/package-lock.json + +__pycache__/ \ No newline at end of file diff --git a/ChatQnA/README.md b/ChatQnA/README.md index e7fbe1feb..985f7cd29 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -113,25 +113,31 @@ curl 127.0.0.1:9090/embed \ Note: If you want to integrate the TEI service into the LangChain application, you'll need to restart the LangChain backend service after launching the TEI service. -## Launch Redis and LangChain Backend Service +## Launch Vector Database and LangChain Backend Service Update the `HUGGINGFACEHUB_API_TOKEN` environment variable with your huggingface token in the `docker-compose.yml` +By default, Redis is used as the vector store. To use Qdrant, use the `docker-compose-qdrant.yml` file instead. + ```bash cd langchain/docker docker compose -f docker-compose.yml up -d +# To use Qdrant, run +# docker compose -f docker-compose-qdrant.yml up -d cd ../../ ``` > [!NOTE] > If you modified any files and want that change introduced in this step, add `--build` to the end of the command to build the container image instead of pulling it from dockerhub. -## Ingest data into Redis +## Ingest Data Into Vector Database -Each time the Redis container is launched, data should be ingested into the container using the commands: +Each time the vector database container is launched, data should be ingested into the container using the commands: ```bash docker exec -it qna-rag-redis-server bash +# To use Qdrant, run +# docker exec -it qna-rag-qdrant-server bash cd /ws python ingest.py ``` diff --git a/ChatQnA/langchain/docker/docker-compose-qdrant.yml b/ChatQnA/langchain/docker/docker-compose-qdrant.yml new file mode 100644 index 000000000..53c4f8ce1 --- /dev/null +++ b/ChatQnA/langchain/docker/docker-compose-qdrant.yml @@ -0,0 +1,45 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +services: + qdrant-vector-db: + image: qdrant/qdrant:v1.9.0 + container_name: qdrant-vector-db + ports: + - "6333:6333" + - "6334:6334" + qna-rag-qdrant-server: + build: + args: + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + dockerfile: Dockerfile + context: . + image: intel/gen-ai-examples:qna-rag-qdrant-server + container_name: qna-rag-qdrant-server + environment: + - https_proxy=${https_proxy} + - HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + - "EMBED_MODEL=BAAI/bge-base-en-v1.5" + - "VECTOR_DATABASE=QDRANT" + - "TGI_LLM_ENDPOINT=http://localhost:8080" + # "TEI_ENDPOINT="http://xxx.xxx.xxx.xxx:9090" - To use a custom TEI endpoint + ulimits: + memlock: + soft: -1 # Set memlock to unlimited (no soft or hard limit) + hard: -1 + volumes: + - ../qdrant:/ws + - ../test:/test + network_mode: "host" diff --git a/ChatQnA/langchain/docker/docker-compose.yml b/ChatQnA/langchain/docker/docker-compose.yml index 2dd666361..e12e7557d 100644 --- a/ChatQnA/langchain/docker/docker-compose.yml +++ b/ChatQnA/langchain/docker/docker-compose.yml @@ -43,6 +43,7 @@ services: - "REDIS_PORT=6379" - "EMBED_MODEL=BAAI/bge-base-en-v1.5" - "REDIS_SCHEMA=schema_dim_768.yml" + - "VECTOR_DATABASE=REDIS" ulimits: memlock: soft: -1 # Set memlock to unlimited (no soft or hard limit) diff --git a/ChatQnA/langchain/docker/qna-app/app/server.py b/ChatQnA/langchain/docker/qna-app/app/server.py index fff024077..203db7099 100644 --- a/ChatQnA/langchain/docker/qna-app/app/server.py +++ b/ChatQnA/langchain/docker/qna-app/app/server.py @@ -23,15 +23,14 @@ from guardrails import moderation_prompt_for_chat, unsafe_dict from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.llms import HuggingFaceEndpoint -from langchain_community.vectorstores import Redis from langchain_core.messages import HumanMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from langserve import add_routes from prompts import contextualize_q_prompt, prompt, qa_prompt -from rag_redis.config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL from starlette.middleware.cors import CORSMiddleware from utils import ( + VECTOR_DATABASE, create_kb_folder, create_retriever_from_files, create_retriever_from_links, @@ -40,6 +39,11 @@ reload_retriever, ) +if VECTOR_DATABASE == "REDIS": + from rag_redis.config import INDEX_NAME +elif VECTOR_DATABASE == "QDRANT": + from rag_qdrant.config import COLLECTION_NAME as INDEX_NAME + parser = argparse.ArgumentParser(description="Server Configuration") parser.add_argument("--chathistory", action="store_true", help="Enable debug mode") args = parser.parse_args() @@ -52,7 +56,6 @@ class RAGAPIRouter(APIRouter): - def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=None) -> None: super().__init__() self.upload_dir = upload_dir @@ -93,15 +96,31 @@ def __init__(self, upload_dir, entrypoint, safety_guard_endpoint, tei_endpoint=N self.embeddings = HuggingFaceHubEmbeddings(model=tei_endpoint) else: # create embeddings using local embedding model + EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") self.embeddings = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) - rds = Redis.from_existing_index( - self.embeddings, - index_name=INDEX_NAME, - redis_url=REDIS_URL, - schema=INDEX_SCHEMA, - ) - retriever = rds.as_retriever(search_type="mmr") + if VECTOR_DATABASE == "REDIS": + from langchain_community.vectorstores import Redis + from rag_redis.config import INDEX_SCHEMA, REDIS_URL + + vdb = Redis.from_existing_index( + self.embeddings, + index_name=INDEX_NAME, + redis_url=REDIS_URL, + schema=INDEX_SCHEMA, + ) + elif VECTOR_DATABASE == "QDRANT": + from langchain_community.vectorstores import Qdrant + from qdrant_client import QdrantClient + from rag_qdrant.config import QDRANT_HOST, QDRANT_PORT + + client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + vdb = Qdrant( + embeddings=self.embeddings, + collection_name=INDEX_NAME, + client=client, + ) + retriever = vdb.as_retriever(search_type="mmr") # Define contextualize chain self.contextualize_q_chain = contextualize_q_prompt | self.llm | StrOutputParser() diff --git a/ChatQnA/langchain/docker/qna-app/app/utils.py b/ChatQnA/langchain/docker/qna-app/app/utils.py index 276b6400c..295d0c359 100644 --- a/ChatQnA/langchain/docker/qna-app/app/utils.py +++ b/ChatQnA/langchain/docker/qna-app/app/utils.py @@ -28,9 +28,13 @@ from bs4 import BeautifulSoup from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import UnstructuredFileLoader -from langchain_community.vectorstores import Redis from langchain_core.documents import Document -from rag_redis.config import INDEX_SCHEMA, REDIS_URL + +SUPPORTED_VECTOR_DATABASES = ["REDIS", "QDRANT"] + +VECTOR_DATABASE = str(os.getenv("VECTOR_DATABASE", "redis")).upper() + +assert VECTOR_DATABASE in SUPPORTED_VECTOR_DATABASES, f"Invalid VECTOR_DATABASE: {VECTOR_DATABASE}" def get_current_beijing_time(): @@ -57,7 +61,6 @@ def create_kb_folder(upload_dir): class Crawler: - def __init__(self, pool=None): if pool: assert isinstance(pool, (str, list, tuple)), "url pool should be str, list or tuple" @@ -292,16 +295,33 @@ def create_retriever_from_files(doc, embeddings, index_name: str): loader = UnstructuredFileLoader(doc, mode="single", strategy="fast") chunks = loader.load_and_split(text_splitter) - rds = Redis.from_texts( - texts=[chunk.page_content for chunk in chunks], - metadatas=[chunk.metadata for chunk in chunks], - embedding=embeddings, - index_name=index_name, - redis_url=REDIS_URL, - index_schema=INDEX_SCHEMA, - ) - - retriever = rds.as_retriever(search_type="mmr") + if VECTOR_DATABASE == "REDIS": + from langchain_community.vectorstores import Redis + from rag_redis.config import INDEX_SCHEMA, REDIS_URL + + vdb = Redis.from_texts( + texts=[chunk.page_content for chunk in chunks], + metadatas=[chunk.metadata for chunk in chunks], + embedding=embeddings, + index_name=index_name, + redis_url=REDIS_URL, + index_schema=INDEX_SCHEMA, + ) + + elif VECTOR_DATABASE == "QDRANT": + from langchain_community.vectorstores import Qdrant + from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT + + vdb = Qdrant.from_texts( + texts=[chunk.page_content for chunk in chunks], + metadatas=[chunk.metadata for chunk in chunks], + embedding=embeddings, + collection_name=COLLECTION_NAME, + host=QDRANT_HOST, + port=QDRANT_PORT, + ) + + retriever = vdb.as_retriever(search_type="mmr") return retriever @@ -315,29 +335,63 @@ def create_retriever_from_links(embeddings, link_list: list, index_name): texts.append(data) metadatas.append(metadata) - rds = Redis.from_texts( - texts=texts, - metadatas=metadatas, - embedding=embeddings, - index_name=index_name, - redis_url=REDIS_URL, - index_schema=INDEX_SCHEMA, - ) - - retriever = rds.as_retriever(search_type="mmr") + if VECTOR_DATABASE == "REDIS": + from langchain_community.vectorstores import Redis + from rag_redis.config import INDEX_SCHEMA, REDIS_URL + + vdb = Redis.from_texts( + texts=texts, + metadatas=metadatas, + embedding=embeddings, + index_name=index_name, + redis_url=REDIS_URL, + index_schema=INDEX_SCHEMA, + ) + + elif VECTOR_DATABASE == "QDRANT": + from langchain_community.vectorstores import Qdrant + from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT + + vdb = Qdrant.from_texts( + texts=texts, + metadatas=metadatas, + embedding=embeddings, + collection_name=COLLECTION_NAME, + host=QDRANT_HOST, + port=QDRANT_PORT, + ) + + retriever = vdb.as_retriever(search_type="mmr") return retriever def reload_retriever(embeddings, index_name): print(f"[rag - reload retriever] reload with index: {index_name}") - rds = Redis.from_existing_index( - embeddings, - index_name=index_name, - redis_url=REDIS_URL, - schema=INDEX_SCHEMA, - ) - - retriever = rds.as_retriever(search_type="mmr") + + if VECTOR_DATABASE == "REDIS": + from langchain_community.vectorstores import Redis + from rag_redis.config import INDEX_SCHEMA, REDIS_URL + + vdb = Redis.from_existing_index( + embeddings, + index_name=index_name, + redis_url=REDIS_URL, + schema=INDEX_SCHEMA, + ) + + elif VECTOR_DATABASE == "QDRANT": + from langchain_community.vectorstores import Qdrant + from qdrant_client import QdrantClient + from rag_qdrant.config import COLLECTION_NAME, QDRANT_HOST, QDRANT_PORT + + client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + vdb = Qdrant( + embeddings=embeddings, + collection_name=COLLECTION_NAME, + client=client, + ) + + retriever = vdb.as_retriever(search_type="mmr") return retriever diff --git a/ChatQnA/langchain/docker/requirements.txt b/ChatQnA/langchain/docker/requirements.txt index 4a687e66b..472cbed0a 100644 --- a/ChatQnA/langchain/docker/requirements.txt +++ b/ChatQnA/langchain/docker/requirements.txt @@ -12,6 +12,7 @@ poetry pyarrow pydantic==1.10.13 pymupdf +qdrant-client==1.9.0 redis sentence-transformers unstructured diff --git a/ChatQnA/langchain/qdrant/LICENSE b/ChatQnA/langchain/qdrant/LICENSE new file mode 100644 index 000000000..426b65090 --- /dev/null +++ b/ChatQnA/langchain/qdrant/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ChatQnA/langchain/qdrant/data/nke-10k-2023.pdf b/ChatQnA/langchain/qdrant/data/nke-10k-2023.pdf new file mode 100644 index 000000000..6ade8863e Binary files /dev/null and b/ChatQnA/langchain/qdrant/data/nke-10k-2023.pdf differ diff --git a/ChatQnA/langchain/qdrant/ingest.py b/ChatQnA/langchain/qdrant/ingest.py new file mode 100644 index 000000000..c0b34301f --- /dev/null +++ b/ChatQnA/langchain/qdrant/ingest.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import io +import os + +import numpy as np +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings +from langchain_community.vectorstores import Qdrant +from PIL import Image +from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TEI_EMBEDDING_ENDPOINT + + +def pdf_loader(file_path): + try: + import easyocr + import fitz + except ImportError: + raise ImportError( + "`PyMuPDF` or 'easyocr' package is not found, please install it with " + "`pip install pymupdf or pip install easyocr.`" + ) + + doc = fitz.open(file_path) + reader = easyocr.Reader(["en"]) + result = "" + for i in range(doc.page_count): + page = doc.load_page(i) + pagetext = page.get_text().strip() + if pagetext: + result = result + pagetext + if len(doc.get_page_images(i)) > 0: + for img in doc.get_page_images(i): + if img: + pageimg = "" + xref = img[0] + img_data = doc.extract_image(xref) + img_bytes = img_data["image"] + pil_image = Image.open(io.BytesIO(img_bytes)) + img = np.array(pil_image) + img_result = reader.readtext(img, paragraph=True, detail=0) + pageimg = pageimg + ", ".join(img_result).strip() + if pageimg.endswith("!") or pageimg.endswith("?") or pageimg.endswith("."): + pass + else: + pageimg = pageimg + "." + result = result + pageimg + return result + + +def ingest_documents(): + """Ingest PDF to Qdrant from the data/ directory that + contains Edgar 10k filings data for Nike.""" + # Load list of pdfs + company_name = "Nike" + data_path = "data/" + doc_path = [os.path.join(data_path, file) for file in os.listdir(data_path)][0] + + print("Parsing 10k filing doc for NIKE", doc_path) + + text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100, add_start_index=True) + content = pdf_loader(doc_path) + chunks = text_splitter.split_text(content) + + print("Done preprocessing. Created ", len(chunks), " chunks of the original pdf") + # Create vectorstore + if TEI_EMBEDDING_ENDPOINT: + # create embeddings using TEI endpoint service + embedder = HuggingFaceHubEmbeddings(model=TEI_EMBEDDING_ENDPOINT) + else: + # create embeddings using local embedding model + embedder = HuggingFaceBgeEmbeddings(model_name=EMBED_MODEL) + + # Batch size + batch_size = 32 + num_chunks = len(chunks) + for i in range(0, num_chunks, batch_size): + batch_chunks = chunks[i : i + batch_size] + batch_texts = [f"Company: {company_name}. " + chunk for chunk in batch_chunks] + + _ = Qdrant.from_texts( + texts=batch_texts, + embedding=embedder, + collection_name=COLLECTION_NAME, + host=QDRANT_HOST, + port=QDRANT_PORT, + ) + print(f"Processed batch {i//batch_size + 1}/{(num_chunks-1)//batch_size + 1}") + + +if __name__ == "__main__": + ingest_documents() diff --git a/ChatQnA/langchain/qdrant/rag_qdrant.ipynb b/ChatQnA/langchain/qdrant/rag_qdrant.ipynb new file mode 100644 index 000000000..d43113a33 --- /dev/null +++ b/ChatQnA/langchain/qdrant/rag_qdrant.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe1adb29", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "681a5d1e", + "metadata": {}, + "source": [ + "## Connect to RAG App\n", + "\n", + "Assuming you are already running this server:\n", + "```bash\n", + "langserve start\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "d774be2a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nike's revenue in 2023 was $51.2 billion. \n", + "\n", + "Source: 'data/nke-10k-2023.pdf', Start Index: '146100'\n" + ] + } + ], + "source": [ + "from langserve.client import RemoteRunnable\n", + "\n", + "rag_qdrant = RemoteRunnable(\"http://localhost:8000/rag-qdrant\")\n", + "\n", + "print(rag_qdrant.invoke(\"What was Nike's revenue in 2023?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "07ae0005", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "As of May 31, 2023, Nike had approximately 83,700 employees worldwide. This information can be found in the first piece of context provided. (source: data/nke-10k-2023.pdf, start_index: 32532)\n" + ] + } + ], + "source": [ + "print(rag_qdrant.invoke(\"How many employees work at Nike?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a6b9f00", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ChatQnA/langchain/qdrant/rag_qdrant/__init__.py b/ChatQnA/langchain/qdrant/rag_qdrant/__init__.py new file mode 100644 index 000000000..28f108cb6 --- /dev/null +++ b/ChatQnA/langchain/qdrant/rag_qdrant/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ChatQnA/langchain/qdrant/rag_qdrant/chain.py b/ChatQnA/langchain/qdrant/rag_qdrant/chain.py new file mode 100644 index 000000000..4ffb375ad --- /dev/null +++ b/ChatQnA/langchain/qdrant/rag_qdrant/chain.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain_community.llms import HuggingFaceEndpoint +from langchain_community.vectorstores import Qdrant +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables import RunnableParallel, RunnablePassthrough +from qdrant_client import QdrantClient +from rag_qdrant.config import COLLECTION_NAME, EMBED_MODEL, QDRANT_HOST, QDRANT_PORT, TGI_LLM_ENDPOINT + + +# Make this look better in the docs. +class Question(BaseModel): + __root__: str + + +# Init Embeddings +embedder = HuggingFaceEmbeddings(model_name=EMBED_MODEL) + +# Connect to pre-loaded vectorstore +# run the ingest.py script to populate this + +client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) +vectorstore = Qdrant(embeddings=embedder, collection_name=COLLECTION_NAME, client=client) + +# TODO allow user to change parameters +retriever = vectorstore.as_retriever(search_type="mmr") + +# Define our prompt +template = """ +Use the following pieces of context from retrieved +dataset to answer the question. Do not make up an answer if there is no +context provided to help answer it. Include the 'source' and 'start_index' +from the metadata included in the context you used to answer the question + +Context: +--------- +{context} + +--------- +Question: {question} +--------- + +Answer: +""" + +prompt = ChatPromptTemplate.from_template(template) + +# RAG Chain +model = HuggingFaceEndpoint( + endpoint_url=TGI_LLM_ENDPOINT, + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + streaming=True, + truncate=1024, +) + +chain = ( + RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) | prompt | model | StrOutputParser() +).with_types(input_type=Question) diff --git a/ChatQnA/langchain/qdrant/rag_qdrant/config.py b/ChatQnA/langchain/qdrant/rag_qdrant/config.py new file mode 100644 index 000000000..c6301dfb9 --- /dev/null +++ b/ChatQnA/langchain/qdrant/rag_qdrant/config.py @@ -0,0 +1,28 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +# Embedding model +EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2") + +# Qdrant configuration +QDRANT_HOST = os.getenv("QDRANT", "localhost") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333)) +COLLECTION_NAME = os.getenv("COLLECTION_NAME", "rag-qdrant") + +# LLM/Embedding endpoints +TGI_LLM_ENDPOINT = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") +TGI_LLM_ENDPOINT_NO_RAG = os.getenv("TGI_LLM_ENDPOINT_NO_RAG", "http://localhost:8081") +TEI_EMBEDDING_ENDPOINT = os.getenv("TEI_ENDPOINT") diff --git a/README.md b/README.md index 30e189202..649d54ffd 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,15 @@ All the examples are well-validated on Intel platforms. In addition, these examp Gaudi2 Chatbot + + LangChain + Mistral-7B + BGE-Base + Qdrant + TGI-Habana + Gaudi2 + Chatbot +