diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2451abbdc8..5c0df5b5ff 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -15,7 +15,7 @@ /comps/prompt_registry/ hoong.tee.yeoh@intel.com /comps/feedback_management/ hoong.tee.yeoh@intel.com /comps/chathistory/ yogesh.pandey@intel.com -/comps/texttosql/ yogesh.pandey@intel.com +/comps/text2sql/ yogesh.pandey@intel.com /comps/text2image/ xinyu.ye@intel.com /comps/reranks/ kaokao.lv@intel.com /comps/retrievers/ kaokao.lv@intel.com diff --git a/.github/workflows/docker/compose/texttosql-compose.yaml b/.github/workflows/docker/compose/text2sql-compose.yaml similarity index 55% rename from .github/workflows/docker/compose/texttosql-compose.yaml rename to .github/workflows/docker/compose/text2sql-compose.yaml index 56c7b41fb7..263790a61a 100644 --- a/.github/workflows/docker/compose/texttosql-compose.yaml +++ b/.github/workflows/docker/compose/text2sql-compose.yaml @@ -3,7 +3,7 @@ # this file should be run in the root of the repo services: - texttosql: + text2sql: build: - dockerfile: comps/texttosql/langchain/Dockerfile - image: ${REGISTRY:-opea}/texttosql:${TAG:-latest} + dockerfile: comps/text2sql/src/Dockerfile + image: ${REGISTRY:-opea}/text2sql:${TAG:-latest} diff --git a/comps/cores/mega/constants.py b/comps/cores/mega/constants.py index e657ba6f45..c90aff0df9 100644 --- a/comps/cores/mega/constants.py +++ b/comps/cores/mega/constants.py @@ -33,6 +33,7 @@ class ServiceType(Enum): TEXT2IMAGE = 16 ANIMATION = 17 IMAGE2IMAGE = 18 + TEXT2SQL = 19 class MegaServiceEndpoint(Enum): diff --git a/comps/text2sql/deployment/docker_compose/README.md b/comps/text2sql/deployment/docker_compose/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/texttosql/langchain/docker_compose_texttosql.yaml b/comps/text2sql/deployment/docker_compose/langchain.yaml similarity index 91% rename from comps/texttosql/langchain/docker_compose_texttosql.yaml rename to comps/text2sql/deployment/docker_compose/langchain.yaml index e482002792..5430472bf4 100644 --- a/comps/texttosql/langchain/docker_compose_texttosql.yaml +++ b/comps/text2sql/deployment/docker_compose/langchain.yaml @@ -32,9 +32,9 @@ services: volumes: - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql - texttosql_service: - image: opea/texttosql:latest - container_name: texttosql_service + text2sql_service: + image: opea/text2sql:latest + container_name: text2sql_service ports: - "9090:8090" environment: diff --git a/comps/text2sql/deployment/kubernetes/README.md b/comps/text2sql/deployment/kubernetes/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/comps/texttosql/langchain/Dockerfile b/comps/text2sql/src/Dockerfile similarity index 70% rename from comps/texttosql/langchain/Dockerfile rename to comps/text2sql/src/Dockerfile index c1bab90364..18cd5bfa1f 100644 --- a/comps/texttosql/langchain/Dockerfile +++ b/comps/text2sql/src/Dockerfile @@ -21,13 +21,13 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ ${ARCH} = "cpu" ]; then \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/texttosql/langchain/requirements.txt; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/text2sql/src/requirements.txt; \ else \ - pip install --no-cache-dir -r /home/user/comps/texttosql/langchain/requirements.txt; \ + pip install --no-cache-dir -r /home/user/comps/text2sql/src/requirements.txt; \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/texttosql/langchain/ +WORKDIR /home/user/comps/text2sql/src/ -ENTRYPOINT ["python", "main.py"] +ENTRYPOINT ["python", "opea_text2sql_microservice.py"] diff --git a/comps/text2sql/src/README.md b/comps/text2sql/src/README.md new file mode 100644 index 0000000000..3d9b34c77a --- /dev/null +++ b/comps/text2sql/src/README.md @@ -0,0 +1,154 @@ +# 🛢 Text-to-SQL Microservice + +In today's data-driven world, the ability to efficiently extract insights from databases is crucial. However, querying databases often requires specialized knowledge of SQL(Structured Query Language) and database schemas, which can be a barrier for non-technical users. This is where the Text-to-SQL microservice comes into play, leveraging the power of LLMs and agentic frameworks to bridge the gap between human language and database queries. This microservice is built on LangChain/LangGraph frameworks. + +The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from their databases. Additionally, the service can be integrated into ChatBots, allowing for natural language interactions and providing accurate responses based on the underlying data. Furthermore, it can be utilized to build custom dashboards, enabling users to visualize and analyze insights based on their specific requirements, all through the power of natural language. + +--- + +## 🛠️ Features + +**Implement SQL Query based on input text**: Transform user-provided natural language into SQL queries, subsequently executing them to retrieve data from SQL databases. + +--- + +## ⚙️ Implementation + +The text-to-sql microservice able to implement with various framework and support various types of SQL databases. + +### 🔗 Utilizing Text-to-SQL with Langchain framework + +The follow guide provides set-up instructions and comprehensive details regarding the Text-to-SQL microservices via LangChain. In this configuration, we will employ PostgresDB as our example database to showcase this microservice. + +--- + +#### 🚀 Start Microservice with Python(Option 1) + +#### Install Requirements + +```bash +pip install -r requirements.txt +``` + +#### Start PostgresDB Service + +We will use [Chinook](https://github.com/lerocha/chinook-database) sample database as a default to test the Text-to-SQL microservice. Chinook database is a sample database ideal for demos and testing ORM tools targeting single and multiple database servers. + +```bash +export POSTGRES_USER=postgres +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=chinook + +cd comps/text2sql + +docker run --name postgres-db --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest +``` + +#### Start TGI Service + +```bash +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" +export TGI_PORT=8008 + +docker run -d --name="text2sql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${LLM_MODEL_ID} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $LLM_MODEL_ID +``` + +#### Verify the TGI Service + +```bash +export your_ip=$(hostname -I | awk '{print $1}') +curl http://${your_ip}:${TGI_PORT}/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' +``` + +#### Setup Environment Variables + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" +``` + +#### Start Text-to-SQL Microservice with Python Script + +Start Text-to-SQL microservice with below command. + +```bash +python3 opea_text2sql_microservice.py +``` + +--- + +### 🚀 Start Microservice with Docker (Option 2) + +#### Start PostGreSQL Database Service + +Please refer to section [Start PostgresDB Service](#start-postgresdb-service) + +#### Start TGI Service + +Please refer to section [Start TGI Service](#start-tgi-service) + +#### Setup Environment Variables + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" +``` + +#### Build Docker Image + +```bash +cd GenAIComps/ +docker build -t opea/text2sql:latest -f comps/text2sql/src/Dockerfile . +``` + +#### Run Docker with CLI (Option A) + +```bash +export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" + +docker run --runtime=runc --name="comps-langchain-text2sql" -p 9090:8080 --ipc=host -e llm_endpoint_url=${TGI_LLM_ENDPOINT} opea/text2sql:latest +``` + +#### Run via docker compose (Option B) + +- Setup Environment Variables. + + ```bash + export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} + export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" + export POSTGRES_USER=postgres + export POSTGRES_PASSWORD=testpwd + export POSTGRES_DB=chinook + ``` + +- Start the services. + + ```bash + docker compose -f docker_compose_text2sql.yaml up + ``` + +--- + +### ✅ Invoke the microservice. + +The Text-to-SQL microservice exposes the following API endpoints: + +- Test Database Connection + + ```bash + curl --location http://${your_ip}:9090/v1/postgres/health \ + --header 'Content-Type: application/json' \ + --data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}' + ``` + +- Execute SQL Query from input text + + ```bash + curl http://${your_ip}:9090/v1/text2sql\ + -X POST \ + -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ + -H 'Content-Type: application/json' + ``` diff --git a/comps/text2sql/src/__init__.py b/comps/text2sql/src/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/text2sql/src/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/texttosql/langchain/chinook.sql b/comps/text2sql/src/chinook.sql similarity index 100% rename from comps/texttosql/langchain/chinook.sql rename to comps/text2sql/src/chinook.sql diff --git a/comps/text2sql/src/integrations/__init__.py b/comps/text2sql/src/integrations/__init__.py new file mode 100644 index 0000000000..916f3a44b2 --- /dev/null +++ b/comps/text2sql/src/integrations/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/text2sql/src/integrations/opea.py b/comps/text2sql/src/integrations/opea.py new file mode 100644 index 0000000000..e69ab752df --- /dev/null +++ b/comps/text2sql/src/integrations/opea.py @@ -0,0 +1,122 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import os +from typing import Annotated, Optional + +from langchain.agents.agent_types import AgentType +from langchain_community.utilities.sql_database import SQLDatabase +from langchain_huggingface import HuggingFaceEndpoint +from pydantic import BaseModel, Field +from sqlalchemy import create_engine +from sqlalchemy.exc import SQLAlchemyError + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.text2sql.src.integrations.sql_agent import CustomSQLDatabaseToolkit, custom_create_sql_agent + +logger = CustomLogger("comps-text2sql") +logflag = os.getenv("LOGFLAG", False) + +sql_params = { + "max_string_length": 3600, +} + +generation_params = { + "max_new_tokens": 1024, + "top_k": 10, + "top_p": 0.95, + "temperature": 0.01, + "repetition_penalty": 1.03, + "streaming": True, +} + +TGI_LLM_ENDPOINT = os.environ.get("TGI_LLM_ENDPOINT") + +llm = HuggingFaceEndpoint( + endpoint_url=TGI_LLM_ENDPOINT, + task="text-generation", + **generation_params, +) + + +class PostgresConnection(BaseModel): + user: Annotated[str, Field(min_length=1)] + password: Annotated[str, Field(min_length=1)] + host: Annotated[str, Field(min_length=1)] + port: Annotated[int, Field(ge=1, le=65535)] # Default PostgreSQL port with constraints + database: Annotated[str, Field(min_length=1)] + + def connection_string(self) -> str: + return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}" + + def test_connection(self) -> bool: + """Test the connection to the PostgreSQL database.""" + connection_string = self.connection_string() + try: + engine = create_engine(connection_string) + with engine.connect() as _: + # If the connection is successful, return True + return True + except SQLAlchemyError as e: + print(f"Connection failed: {e}") + return False + + +class Input(BaseModel): + input_text: str + conn_str: Optional[PostgresConnection] = None + + +class OpeaText2SQL(OpeaComponent): + """A specialized text to sql component derived from OpeaComponent for interacting with TGI services and Database. + + Attributes: + client: An instance of the client for text to sql generation and execution. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.TEXT2SQL.name.lower(), description, config) + + async def check_health(self) -> bool: + """Checks the health of the TGI service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = llm.generate(["Hello, how are you?"]) + return True + except Exception as e: + return False + + async def invoke(self, input: Input): + url = input.conn_str.connection_string() + """Execute a SQL query using the custom SQL agent. + + Args: + input (str): The user's input. + url (str): The URL of the database to connect to. + + Returns: + dict: The result of the SQL execution. + """ + db = SQLDatabase.from_uri(url, **sql_params) + logger.info("Starting Agent") + agent_executor = custom_create_sql_agent( + llm=llm, + verbose=True, + toolkit=CustomSQLDatabaseToolkit(llm=llm, db=db), + agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, + agent_executor_kwargs={"return_intermediate_steps": True}, + ) + + result = await agent_executor.ainvoke(input) + + query = [] + for log, _ in result["intermediate_steps"]: + if log.tool == "sql_db_query": + query.append(log.tool_input) + result["sql"] = query[0].replace("Observation", "") + return result diff --git a/comps/texttosql/langchain/texttosql.py b/comps/text2sql/src/integrations/sql_agent.py similarity index 87% rename from comps/texttosql/langchain/texttosql.py rename to comps/text2sql/src/integrations/sql_agent.py index eb8f6dae41..a78638c568 100644 --- a/comps/texttosql/langchain/texttosql.py +++ b/comps/text2sql/src/integrations/sql_agent.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -8,7 +8,6 @@ from langchain.agents import create_react_agent from langchain.agents.agent import AgentExecutor, RunnableAgent -from langchain.agents.agent_types import AgentType from langchain.agents.mrkl import prompt as react_prompt from langchain.chains.llm import LLMChain from langchain_community.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX @@ -21,37 +20,21 @@ from langchain_core.prompts import BasePromptTemplate, PromptTemplate from langchain_core.pydantic_v1 import BaseModel, Field, root_validator from langchain_core.tools import BaseTool -from langchain_huggingface import HuggingFaceEndpoint from sqlalchemy.engine import Result from comps import CustomLogger -generation_params = { - "max_new_tokens": 1024, - "top_k": 10, - "top_p": 0.95, - "temperature": 0.01, - "repetition_penalty": 1.03, - "streaming": True, -} - - -TGI_LLM_ENDPOINT = os.environ.get("TGI_LLM_ENDPOINT") - -llm = HuggingFaceEndpoint( - endpoint_url=TGI_LLM_ENDPOINT, - task="text-generation", - **generation_params, -) - -sql_params = { - "max_string_length": 3600, -} - -logger = CustomLogger("comps-texttosql") +logger = CustomLogger("comps-text2sql") logflag = os.getenv("LOGFLAG", False) -# https://github.com/langchain-ai/langchain/issues/23585 + +def remove_quotes(s): + if s.startswith('"') and s.endswith('"'): + return s[1:-1] + elif s.startswith("'") and s.endswith("'"): + return s[1:-1] + else: + return s class BaseSQLDatabaseTool(BaseModel): @@ -86,10 +69,15 @@ def _run( """Execute the query, return the results or an error message.""" logger.info("query: {}".format(query)) query = query.replace("\nObservation", "") + query = remove_quotes(query) result = self.db.run_no_throw(query) return result +class _ListSQLDataBaseToolInput(BaseModel): + tool_input: str = Field("", description="An empty string") + + class _InfoSQLDatabaseToolInput(BaseModel): table_names: str = Field( ..., @@ -117,10 +105,6 @@ def _run( return self.db.get_table_info_no_throw([t.strip() for t in table_names.split(",")]) -class _ListSQLDataBaseToolInput(BaseModel): - tool_input: str = Field("", description="An empty string") - - class CustomListSQLDatabaseTool(BaseSQLDatabaseTool, BaseTool): """Tool for getting tables names.""" @@ -306,33 +290,3 @@ def custom_create_sql_agent( handle_parsing_errors=True, **(agent_executor_kwargs or {}), ) - - -def execute(input, url): - """Execute a SQL query using the custom SQL agent. - - Args: - input (str): The user's input. - url (str): The URL of the database to connect to. - - Returns: - dict: The result of the SQL execution. - """ - db = SQLDatabase.from_uri(url, **sql_params) - logger.info("Starting Agent") - agent_executor = custom_create_sql_agent( - llm=llm, - verbose=True, - toolkit=CustomSQLDatabaseToolkit(llm=llm, db=db), - agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION, - agent_executor_kwargs={"return_intermediate_steps": True}, - ) - - result = agent_executor.invoke(input) - - query = [] - for log, _ in result["intermediate_steps"]: - if log.tool == "sql_db_query": - query.append(log.tool_input) - result["sql"] = query[0].replace("Observation", "") - return result diff --git a/comps/text2sql/src/opea_text2sql_microservice.py b/comps/text2sql/src/opea_text2sql_microservice.py new file mode 100644 index 0000000000..3e4d4bb5ae --- /dev/null +++ b/comps/text2sql/src/opea_text2sql_microservice.py @@ -0,0 +1,67 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import pathlib +import sys + +from fastapi.exceptions import HTTPException + +from comps import CustomLogger, OpeaComponentController, opea_microservices, register_microservice +from comps.text2sql.src.integrations.opea import Input, OpeaText2SQL + +cur_path = pathlib.Path(__file__).parent.resolve() +comps_path = os.path.join(cur_path, "../../../") +sys.path.append(comps_path) + +logger = CustomLogger("text2sql") +logflag = os.getenv("LOGFLAG", False) + +try: + # Initialize OpeaComponentController + controller = OpeaComponentController() + + # Register components + text2sql_agent = OpeaText2SQL( + name="Text2SQL", + description="Text2SQL Service", + ) + + # Register components with the controller + controller.register(text2sql_agent) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@text2sql", + endpoint="/v1/text2sql", + host="0.0.0.0", + port=8080, +) +async def execute_agent(input: Input): + """Execute a SQL query from the input text. + + This function takes an Input object containing the input text and database connection information. + It uses the execute function from the text2sql module to execute the SQL query and returns the result. + + Args: + input (Input): An Input object with the input text and database connection information. + + Returns: + dict: A dictionary with a 'result' key containing the output of the executed SQL query. + """ + if input.conn_str.test_connection(): + response = await controller.invoke(input) + # response = "a" + return {"result": response} + else: + raise HTTPException(status_code=500, detail="Failed to connect to PostgreSQL database") + + +if __name__ == "__main__": + logger.info("OPEA Text2SQL Microservice is starting...") + opea_microservices["opea_service@text2sql"].start() diff --git a/comps/texttosql/langchain/requirements.txt b/comps/text2sql/src/requirements.txt similarity index 100% rename from comps/texttosql/langchain/requirements.txt rename to comps/text2sql/src/requirements.txt diff --git a/comps/texttosql/README.md b/comps/texttosql/README.md deleted file mode 100644 index 5c2f31ce23..0000000000 --- a/comps/texttosql/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# 🛢 Text-to-SQL Microservice - -In today's data-driven world, the ability to efficiently extract insights from databases is crucial. However, querying databases often requires specialized knowledge of SQL(Structured Query Language) and database schemas, which can be a barrier for non-technical users. This is where the Text-to-SQL microservice comes into play, leveraging the power of LLMs and agentic frameworks to bridge the gap between human language and database queries. This microservice is built on LangChain/LangGraph frameworks. - -The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from their databases. Additionally, the service can be integrated into ChatBots, allowing for natural language interactions and providing accurate responses based on the underlying data. Furthermore, it can be utilized to build custom dashboards, enabling users to visualize and analyze insights based on their specific requirements, all through the power of natural language. - ---- - -## 🛠️ Features - -**Implement SQL Query based on input text**: Transform user-provided natural language into SQL queries, subsequently executing them to retrieve data from SQL databases. - ---- - -## ⚙️ Implementation - -The text-to-sql microservice able to implement with various framework and support various types of SQL databases. - -### 🔗 Utilizing Text-to-SQL with Langchain framework - -For additional information, please refer to this [README](./langchain/README.md) diff --git a/comps/texttosql/langchain/README.md b/comps/texttosql/langchain/README.md deleted file mode 100644 index 128d030fdb..0000000000 --- a/comps/texttosql/langchain/README.md +++ /dev/null @@ -1,136 +0,0 @@ -# 🛢🔗 Text-to-SQL Microservice with Langchain - -This README provides set-up instructions and comprehensive details regarding the Text-to-SQL microservices via LangChain. In this configuration, we will employ PostgresDB as our example database to showcase this microservice. - ---- - -## 🚀 Start Microservice with Python(Option 1) - -### Install Requirements - -```bash -pip install -r requirements.txt -``` - -### Start PostgresDB Service - -We will use [Chinook](https://github.com/lerocha/chinook-database) sample database as a default to test the Text-to-SQL microservice. Chinook database is a sample database ideal for demos and testing ORM tools targeting single and multiple database servers. - -```bash -export POSTGRES_USER=postgres -export POSTGRES_PASSWORD=testpwd -export POSTGRES_DB=chinook - -cd comps/texttosql/langchain - -docker run --name postgres-db --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest -``` - -### Start TGI Service - -```bash -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" -export TGI_PORT=8008 - -docker run -d --name="texttosql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${LLM_MODEL_ID} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $LLM_MODEL_ID -``` - -### Verify the TGI Service - -```bash -export your_ip=$(hostname -I | awk '{print $1}') -curl http://${your_ip}:${TGI_PORT}/generate \ - -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ - -H 'Content-Type: application/json' -``` - -### Setup Environment Variables - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" -``` - -### Start Text-to-SQL Microservice with Python Script - -Start Text-to-SQL microservice with below command. - -```bash -python3 main.py -``` - ---- - -## 🚀 Start Microservice with Docker (Option 2) - -### Start PostGreSQL Database Service - -Please refer to section [Start PostgresDB Service](#start-postgresdb-service) - -### Start TGI Service - -Please refer to section [Start TGI Service](#start-tgi-service) - -### Setup Environment Variables - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" -``` - -### Build Docker Image - -```bash -cd GenAIComps/ -docker build -t opea/texttosql:latest -f comps/texttosql/langchain/Dockerfile . -``` - -#### Run Docker with CLI (Option A) - -```bash -export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" - -docker run --runtime=runc --name="comps-langchain-texttosql" -p 9090:8080 --ipc=host -e llm_endpoint_url=${TGI_LLM_ENDPOINT} opea/texttosql:latest -``` - -#### Run via docker compose (Option B) - -- Setup Environment Variables. - - ```bash - export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} - export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" - export POSTGRES_USER=postgres - export POSTGRES_PASSWORD=testpwd - export POSTGRES_DB=chinook - ``` - -- Start the services. - - ```bash - docker compose -f docker_compose_texttosql.yaml up - ``` - ---- - -## ✅ Invoke the microservice. - -The Text-to-SQL microservice exposes the following API endpoints: - -- Test Database Connection - - ```bash - curl --location http://${your_ip}:9090/v1/postgres/health \ - --header 'Content-Type: application/json' \ - --data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}' - ``` - -- Execute SQL Query from input text - - ```bash - curl http://${your_ip}:9090/v1/texttosql\ - -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ - -H 'Content-Type: application/json' - ``` diff --git a/comps/texttosql/langchain/main.py b/comps/texttosql/langchain/main.py deleted file mode 100644 index 80284b30fc..0000000000 --- a/comps/texttosql/langchain/main.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -import pathlib -import sys -from typing import Annotated, Optional - -from fastapi.exceptions import HTTPException -from pydantic import BaseModel, Field -from sqlalchemy import create_engine -from sqlalchemy.exc import SQLAlchemyError -from texttosql import execute - -from comps import opea_microservices, register_microservice - -cur_path = pathlib.Path(__file__).parent.resolve() -comps_path = os.path.join(cur_path, "../../../") -sys.path.append(comps_path) - - -class PostgresConnection(BaseModel): - user: Annotated[str, Field(min_length=1)] - password: Annotated[str, Field(min_length=1)] - host: Annotated[str, Field(min_length=1)] - port: Annotated[int, Field(ge=1, le=65535)] # Default PostgreSQL port with constraints - database: Annotated[str, Field(min_length=1)] - - def connection_string(self) -> str: - return f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}" - - def test_connection(self) -> bool: - """Test the connection to the PostgreSQL database.""" - connection_string = self.connection_string() - try: - engine = create_engine(connection_string) - with engine.connect() as _: - # If the connection is successful, return True - return True - except SQLAlchemyError as e: - print(f"Connection failed: {e}") - return False - - -class Input(BaseModel): - input_text: str - conn_str: Optional[PostgresConnection] = None - - -@register_microservice( - name="opea_service@texttosql", - endpoint="/v1/postgres/health", - host="0.0.0.0", - port=8090, -) -def test_connection(input: PostgresConnection): - """Test the connection to a PostgreSQL database. - - This function is used as an OPEA microservice to test whether a PostgreSQL - connection can be established successfully. - - Args: - input (PostgresConnection): A PostgresConnection object containing the database credentials. - This argument is required for this function. - - Returns: - dict: A dictionary with a 'status' key indicating the outcome of the test. The value of 'status' - will be either 'success' or 'failed'. If 'status' is 'failed', the message attribute - contains an error message. - """ - # Test the database connection - result = input.test_connection() - if not result: - raise HTTPException(status_code=500, detail="Failed to connect to PostgreSQL database") - else: - return {"status": "success", "message": "Connected successfully to PostgreSQL database"} - - -@register_microservice( - name="opea_service@texttosql", - endpoint="/v1/texttosql", - host="0.0.0.0", - port=8090, -) -def execute_agent(input: Input): - """Execute a SQL query from the input text. - - This function takes an Input object containing the input text and database connection information. - It uses the execute function from the texttosql module to execute the SQL query and returns the result. - - Args: - input (Input): An Input object with the input text and database connection information. - - Returns: - dict: A dictionary with a 'result' key containing the output of the executed SQL query. - """ - url = input.conn_str.connection_string() - if input.conn_str.test_connection(): - result = execute(input.input_text, url) - return {"result": result} - else: - raise HTTPException(status_code=500, detail="Failed to connect to PostgreSQL database") - - -if __name__ == "__main__": - opea_microservices["opea_service@texttosql"].start() diff --git a/tests/text2sql/test_text2sql_opea.sh b/tests/text2sql/test_text2sql_opea.sh new file mode 100644 index 0000000000..2ee0615766 --- /dev/null +++ b/tests/text2sql/test_text2sql_opea.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +tgi_port=8080 +tgi_volume=$WORKPATH/data + +export model="mistralai/Mistral-7B-Instruct-v0.3" +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export POSTGRES_USER=postgres +export POSTGRES_PASSWORD=testpwd +export POSTGRES_DB=chinook + +function build_docker_images() { + cd $WORKPATH + docker build --no-cache -t opea/text2sql:comps -f comps/text2sql/src/Dockerfile . +} + + +check_tgi_connection() { + url=$1 + timeout=1200 + interval=10 + + local start_time=$(date +%s) + + while true; do + if curl --silent --head --fail "$url" > /dev/null; then + echo "Success" + return 0 + fi + echo + local current_time=$(date +%s) + + local elapsed_time=$((current_time - start_time)) + + if [ "$elapsed_time" -ge "$timeout" ]; then + echo "Timeout,$((timeout / 60))min can't connect $url" + return 1 + fi + echo "Waiting for service for $elapsed_time seconds" + sleep "$interval" + done +} + + +function start_service() { + + docker run --name test-text2sql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v $WORKPATH/comps/text2sql/src/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest + + docker run -d --name="test-text2sql-tgi-endpoint" --ipc=host -p $tgi_port:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model + + export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_port}" + text2sql_port=9090 + unset http_proxy + docker run -d --name="test-text2sql-server" --ipc=host -p ${text2sql_port}:8080 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT opea/text2sql:comps + + # check whether tgi is fully ready + check_tgi_connection "${TGI_LLM_ENDPOINT}/health" +} + +function validate_microservice() { + text2sql_port=9090 + result=$(http_proxy="" curl http://${ip_address}:${text2sql_port}/v1/text2sql\ + -X POST \ + -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ + -H 'Content-Type: application/json') + + if [[ $result == *"output"* ]]; then + echo $result + echo "Result correct." + else + echo "Result wrong. Received was $result" + docker logs test-text2sql-server > ${LOG_PATH}/text2sql.log + docker logs test-text2sql-tgi-endpoint > ${LOG_PATH}/tgi.log + exit 1 + fi + +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-text2sql*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/texttosql/test_texttosql_langchain.sh b/tests/texttosql/test_texttosql_langchain.sh deleted file mode 100755 index 2f4f01c58f..0000000000 --- a/tests/texttosql/test_texttosql_langchain.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -xe - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') -tgi_port=8080 -tgi_volume=$WORKPATH/data - -export model="mistralai/Mistral-7B-Instruct-v0.3" -export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export POSTGRES_USER=postgres -export POSTGRES_PASSWORD=testpwd -export POSTGRES_DB=chinook - -function build_docker_images() { - cd $WORKPATH - docker build --no-cache -t opea/texttosql:comps -f comps/texttosql/langchain/Dockerfile . -} - -function start_service() { - - docker run --name test-texttosql-postgres --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v $WORKPATH/comps/texttosql/langchain/chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest - - docker run -d --name="test-texttosql-tgi-endpoint" --ipc=host -p $tgi_port:80 -v ./data:/data --shm-size 1g -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HF_TOKEN} -e model=${model} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model - - export TGI_LLM_ENDPOINT="http://${ip_address}:${tgi_port}" - texttosql_port=9090 - unset http_proxy - docker run -d --name="test-texttosql-server" --ipc=host -p ${texttosql_port}:8090 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TGI_LLM_ENDPOINT=$TGI_LLM_ENDPOINT opea/texttosql:comps - - # check whether tgi is fully ready - n=0 - until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do - docker logs test-texttosql-tgi-endpoint > ${LOG_PATH}/tgi.log - n=$((n+1)) - if grep -q Connected ${LOG_PATH}/tgi.log; then - break - fi - sleep 5s - done - sleep 5s -} - -function validate_microservice() { - texttosql_port=9090 - result=$(http_proxy="" curl http://${ip_address}:${texttosql_port}/v1/texttosql\ - -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ - -H 'Content-Type: application/json') - - if [[ $result == *"output"* ]]; then - echo $result - echo "Result correct." - else - echo "Result wrong. Received was $result" - docker logs test-texttosql-server > ${LOG_PATH}/texttosql.log - docker logs test-texttosql-tgi-endpoint > ${LOG_PATH}/tgi.log - exit 1 - fi - -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-texttosql*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main