From c612a7f333740a300e383dadce011434842e94c8 Mon Sep 17 00:00:00 2001 From: "Hoong Tee, Yeoh" Date: Tue, 1 Oct 2024 11:30:36 +0800 Subject: [PATCH] [text-to-sql]: Refine and minor code refactor (#755) * Minor code refactor Remove unused import, unused variables and enhance docstring of code. Signed-off-by: Yeoh, Hoong Tee * text-to-sql: Refine documentation Signed-off-by: Yeoh, Hoong Tee * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [text-to-sql]: Restructure README documentation Signed-off-by: Yeoh, Hoong Tee * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Yeoh, Hoong Tee Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/texttosql/README.md | 21 +++++ comps/texttosql/langchain/README.md | 102 +++++++++++++------------ comps/texttosql/langchain/main.py | 6 +- comps/texttosql/langchain/texttosql.py | 13 ++-- 4 files changed, 83 insertions(+), 59 deletions(-) create mode 100644 comps/texttosql/README.md diff --git a/comps/texttosql/README.md b/comps/texttosql/README.md new file mode 100644 index 000000000..5c2f31ce2 --- /dev/null +++ b/comps/texttosql/README.md @@ -0,0 +1,21 @@ +# 🛢 Text-to-SQL Microservice + +In today's data-driven world, the ability to efficiently extract insights from databases is crucial. However, querying databases often requires specialized knowledge of SQL(Structured Query Language) and database schemas, which can be a barrier for non-technical users. This is where the Text-to-SQL microservice comes into play, leveraging the power of LLMs and agentic frameworks to bridge the gap between human language and database queries. This microservice is built on LangChain/LangGraph frameworks. + +The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from their databases. Additionally, the service can be integrated into ChatBots, allowing for natural language interactions and providing accurate responses based on the underlying data. Furthermore, it can be utilized to build custom dashboards, enabling users to visualize and analyze insights based on their specific requirements, all through the power of natural language. + +--- + +## 🛠️ Features + +**Implement SQL Query based on input text**: Transform user-provided natural language into SQL queries, subsequently executing them to retrieve data from SQL databases. + +--- + +## ⚙️ Implementation + +The text-to-sql microservice able to implement with various framework and support various types of SQL databases. + +### 🔗 Utilizing Text-to-SQL with Langchain framework + +For additional information, please refer to this [README](./langchain/README.md) diff --git a/comps/texttosql/langchain/README.md b/comps/texttosql/langchain/README.md index 24316800c..128d030fd 100644 --- a/comps/texttosql/langchain/README.md +++ b/comps/texttosql/langchain/README.md @@ -1,18 +1,18 @@ -# Text-to-SQL Microservice +# 🛢🔗 Text-to-SQL Microservice with Langchain -In today's data-driven world, the ability to efficiently extract insights from databases is crucial. However, querying databases often requires specialized knowledge of SQL and database schemas, which can be a barrier for non-technical users. This is where the Text-to-SQL microservice comes into play, leveraging the power of LLMs and agentic frameworks to bridge the gap between human language and database queries. This microservice is built on Langchain/Langgraph frameworks. +This README provides set-up instructions and comprehensive details regarding the Text-to-SQL microservices via LangChain. In this configuration, we will employ PostgresDB as our example database to showcase this microservice. -The microservice enables a wide range of use cases, making it a versatile tool for businesses, researchers, and individuals alike. Users can generate queries based on natural language questions, enabling them to quickly retrieve relevant data from their databases. Additionally, the service can be integrated into chatbots, allowing for natural language interactions and providing accurate responses based on the underlying data. Furthermore, it can be utilized to build custom dashboards, enabling users to visualize and analyze insights based on their specific requirements, all through the power of natural language. +--- -## 🚀1. Start Microservice with Python(Option 1) +## 🚀 Start Microservice with Python(Option 1) -### 1.1 Install Requirements +### Install Requirements ```bash pip install -r requirements.txt ``` -### 1.2 Start PostgresDB Service +### Start PostgresDB Service We will use [Chinook](https://github.com/lerocha/chinook-database) sample database as a default to test the Text-to-SQL microservice. Chinook database is a sample database ideal for demos and testing ORM tools targeting single and multiple database servers. @@ -26,7 +26,7 @@ cd comps/texttosql/langchain docker run --name postgres-db --ipc=host -e POSTGRES_USER=${POSTGRES_USER} -e POSTGRES_HOST_AUTH_METHOD=trust -e POSTGRES_DB=${POSTGRES_DB} -e POSTGRES_PASSWORD=${POSTGRES_PASSWORD} -p 5442:5432 -d -v ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql postgres:latest ``` -### 1.3 Start TGI Service +### Start TGI Service ```bash export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} @@ -36,10 +36,9 @@ export TGI_PORT=8008 docker run -d --name="texttosql-tgi-endpoint" --ipc=host -p $TGI_PORT:80 -v ./data:/data --shm-size 1g -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e model=${LLM_MODEL_ID} ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $LLM_MODEL_ID ``` -### 1.4 Verify the TGI Service +### Verify the TGI Service ```bash - export your_ip=$(hostname -I | awk '{print $1}') curl http://${your_ip}:${TGI_PORT}/generate \ -X POST \ @@ -47,13 +46,13 @@ curl http://${your_ip}:${TGI_PORT}/generate \ -H 'Content-Type: application/json' ``` -### 1.5 Setup Environment Variables +### Setup Environment Variables ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" ``` -### 1.6 Start Text-to-SQL Microservice with Python Script +### Start Text-to-SQL Microservice with Python Script Start Text-to-SQL microservice with below command. @@ -61,74 +60,77 @@ Start Text-to-SQL microservice with below command. python3 main.py ``` -## 🚀2. Start Microservice with Docker (Option 2) +--- + +## 🚀 Start Microservice with Docker (Option 2) -### 2.1 Start PostGreSQL Database Service +### Start PostGreSQL Database Service -Please refer to 1.2. +Please refer to section [Start PostgresDB Service](#start-postgresdb-service) -### 2.2 Start TGI Service +### Start TGI Service -Please refer to 1.3. +Please refer to section [Start TGI Service](#start-tgi-service) -### 2.3 Setup Environment Variables +### Setup Environment Variables ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" ``` -### 2.4 Build Docker Image +### Build Docker Image ```bash -cd GenAIComps/ # back to GenAIComps/ folder +cd GenAIComps/ docker build -t opea/texttosql:latest -f comps/texttosql/langchain/Dockerfile . ``` -#### 2.5 Run Docker with CLI (Option A) +#### Run Docker with CLI (Option A) ```bash export TGI_LLM_ENDPOINT="http://${your_ip}:${TGI_PORT}" docker run --runtime=runc --name="comps-langchain-texttosql" -p 9090:8080 --ipc=host -e llm_endpoint_url=${TGI_LLM_ENDPOINT} opea/texttosql:latest - ``` -#### 2.6 Run via docker compose. (Option B) +#### Run via docker compose (Option B) -Set Environment Variables. +- Setup Environment Variables. -```bash -export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} -export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" -export POSTGRES_USER=postgres -export POSTGRES_PASSWORD=testpwd -export POSTGRES_DB=chinook -``` + ```bash + export TGI_LLM_ENDPOINT=http://${your_ip}:${TGI_PORT} + export HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" + export POSTGRES_USER=postgres + export POSTGRES_PASSWORD=testpwd + export POSTGRES_DB=chinook + ``` -Start the services. +- Start the services. -```bash -docker compose -f docker_compose_texttosql.yaml up -``` + ```bash + docker compose -f docker_compose_texttosql.yaml up + ``` -## 🚀3. Consume Microservice +--- -Once Text-to-SQL microservice is started, user can use below command +## ✅ Invoke the microservice. -- Test the Database connection +The Text-to-SQL microservice exposes the following API endpoints: -```bash -curl --location http://${your_ip}:9090/v1/postgres/health \ ---header 'Content-Type: application/json' \ ---data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}' -``` +- Test Database Connection -- Invoke the microservice. + ```bash + curl --location http://${your_ip}:9090/v1/postgres/health \ + --header 'Content-Type: application/json' \ + --data '{"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}' + ``` -```bash -curl http://${your_ip}:9090/v1/texttosql\ - -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ - -H 'Content-Type: application/json' -``` +- Execute SQL Query from input text + + ```bash + curl http://${your_ip}:9090/v1/texttosql\ + -X POST \ + -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${your_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ + -H 'Content-Type: application/json' + ``` diff --git a/comps/texttosql/langchain/main.py b/comps/texttosql/langchain/main.py index 13bb75ef4..80284b30f 100644 --- a/comps/texttosql/langchain/main.py +++ b/comps/texttosql/langchain/main.py @@ -12,12 +12,12 @@ from sqlalchemy.exc import SQLAlchemyError from texttosql import execute +from comps import opea_microservices, register_microservice + cur_path = pathlib.Path(__file__).parent.resolve() comps_path = os.path.join(cur_path, "../../../") sys.path.append(comps_path) -from comps import opea_microservices, register_microservice - class PostgresConnection(BaseModel): user: Annotated[str, Field(min_length=1)] @@ -34,7 +34,7 @@ def test_connection(self) -> bool: connection_string = self.connection_string() try: engine = create_engine(connection_string) - with engine.connect() as connection: + with engine.connect() as _: # If the connection is successful, return True return True except SQLAlchemyError as e: diff --git a/comps/texttosql/langchain/texttosql.py b/comps/texttosql/langchain/texttosql.py index 26dcbc468..eb8f6dae4 100644 --- a/comps/texttosql/langchain/texttosql.py +++ b/comps/texttosql/langchain/texttosql.py @@ -9,6 +9,8 @@ from langchain.agents import create_react_agent from langchain.agents.agent import AgentExecutor, RunnableAgent from langchain.agents.agent_types import AgentType +from langchain.agents.mrkl import prompt as react_prompt +from langchain.chains.llm import LLMChain from langchain_community.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit from langchain_community.tools.sql_database.prompt import QUERY_CHECKER @@ -157,9 +159,9 @@ class CustomQuerySQLCheckerTool(BaseSQLDatabaseTool, BaseTool): @root_validator(pre=True) def initialize_llm_chain(cls, values: Dict[str, Any]) -> Dict[str, Any]: - if "llm_chain" not in values: - from langchain.chains.llm import LLMChain + """Initializes the LLM chain if it does not exist in the given values dictionary.""" + if "llm_chain" not in values: values["llm_chain"] = LLMChain( llm=values.get("llm"), # type: ignore[arg-type] prompt=PromptTemplate(template=QUERY_CHECKER, input_variables=["dialect", "query"]), @@ -195,6 +197,7 @@ async def _arun( class CustomSQLDatabaseToolkit(SQLDatabaseToolkit): + """Provides functionality to manage and manipulate SQL databases in customized way.""" def get_tools(self) -> List[BaseTool]: """Get the tools in the toolkit.""" @@ -252,7 +255,7 @@ def custom_create_sql_agent( prompt: Optional[BasePromptTemplate] = None, **kwargs: Any, ) -> AgentExecutor: - """""" + """Creates a SQL agent with specified parameters.""" tools = toolkit.get_tools() if prompt is None: @@ -273,8 +276,6 @@ def custom_create_sql_agent( tools = [tool for tool in tools if not isinstance(tool, ListSQLDatabaseTool)] if prompt is None: - from langchain.agents.mrkl import prompt as react_prompt - format_instructions = format_instructions or react_prompt.FORMAT_INSTRUCTIONS template = "\n\n".join( [ @@ -330,7 +331,7 @@ def execute(input, url): result = agent_executor.invoke(input) query = [] - for log, output in result["intermediate_steps"]: + for log, _ in result["intermediate_steps"]: if log.tool == "sql_db_query": query.append(log.tool_input) result["sql"] = query[0].replace("Observation", "")