diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ff4ff47d789..713f68b205b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -70,7 +70,7 @@ jobs: echo "RAGFLOW_IMAGE=infiniflow/ragflow:dev" >> docker/.env sudo docker compose -f docker/docker-compose.yml up -d - - name: Run tests against Elasticsearch + - name: Run sdk tests against Elasticsearch run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" export HOST_ADDRESS=http://host.docker.internal:9380 @@ -78,7 +78,18 @@ jobs: echo "Waiting for service to be available..." sleep 5 done - cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest --tb=short t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py + cd sdk/python && poetry install && source .venv/bin/activate && cd test/test_sdk_api && pytest -s --tb=short get_email.py t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py + + - name: Run frontend api tests against Elasticsearch + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + export HOST_ADDRESS=http://host.docker.internal:9380 + until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + cd sdk/python && poetry install && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py + - name: Stop ragflow:dev if: always() # always run this step even if previous steps failed @@ -89,7 +100,17 @@ jobs: run: | sudo DOC_ENGINE=infinity docker compose -f docker/docker-compose.yml up -d - - name: Run tests against Infinity + - name: Run sdk tests against Infinity + run: | + export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" + export HOST_ADDRESS=http://host.docker.internal:9380 + until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do + echo "Waiting for service to be available..." + sleep 5 + done + cd sdk/python && poetry install && source .venv/bin/activate && cd test/test_sdk_api && pytest -s --tb=short get_email.py t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py + + - name: Run frontend api tests against Infinity run: | export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY="" export HOST_ADDRESS=http://host.docker.internal:9380 @@ -97,7 +118,7 @@ jobs: echo "Waiting for service to be available..." sleep 5 done - cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest --tb=short t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py + cd sdk/python && poetry install && source .venv/bin/activate && cd test/test_frontend_api && pytest -s --tb=short get_email.py test_dataset.py - name: Stop ragflow:dev if: always() # always run this step even if previous steps failed diff --git a/agent/component/__init__.py b/agent/component/__init__.py index a977f03f441..d8e31fb86ce 100644 --- a/agent/component/__init__.py +++ b/agent/component/__init__.py @@ -30,6 +30,7 @@ from .akshare import AkShare, AkShareParam from .crawler import Crawler, CrawlerParam from .invoke import Invoke, InvokeParam +from .template import Template, TemplateParam def component_class(class_name): diff --git a/agent/component/base.py b/agent/component/base.py index eab52bac52d..1aa9629e59f 100644 --- a/agent/component/base.py +++ b/agent/component/base.py @@ -385,10 +385,14 @@ def __str__(self): """ return """{{ "component_name": "{}", - "params": {} + "params": {}, + "output": {}, + "inputs": {} }}""".format(self.component_name, - self._param - ) + self._param, + json.dumps(json.loads(str(self._param))["output"], ensure_ascii=False), + json.dumps(json.loads(str(self._param))["inputs"], ensure_ascii=False) + ) def __init__(self, canvas, id, param: ComponentParamBase): self._canvas = canvas diff --git a/agent/component/generate.py b/agent/component/generate.py index 53a0a45531e..c6640c88986 100644 --- a/agent/component/generate.py +++ b/agent/component/generate.py @@ -145,7 +145,7 @@ def _run(self, history, **kwargs): else: retrieval_res = pd.DataFrame([]) for n, v in kwargs.items(): - prompt = re.sub(r"\{%s\}" % re.escape(n), re.escape(str(v)), prompt) + prompt = re.sub(r"\{%s\}" % re.escape(n), str(v), prompt) if not self._param.inputs and prompt.find("{input}") >= 0: retrieval_res = self.get_input() diff --git a/agent/component/template.py b/agent/component/template.py new file mode 100644 index 00000000000..06f84114e2d --- /dev/null +++ b/agent/component/template.py @@ -0,0 +1,85 @@ +# +# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import re +from agent.component.base import ComponentBase, ComponentParamBase + + +class TemplateParam(ComponentParamBase): + """ + Define the Generate component parameters. + """ + + def __init__(self): + super().__init__() + self.content = "" + self.parameters = [] + + def check(self): + self.check_empty(self.content, "[Template] Content") + return True + + +class Template(ComponentBase): + component_name = "Template" + + def get_dependent_components(self): + cpnts = set([para["component_id"].split("@")[0] for para in self._param.parameters \ + if para.get("component_id") \ + and para["component_id"].lower().find("answer") < 0 \ + and para["component_id"].lower().find("begin") < 0]) + return list(cpnts) + + def _run(self, history, **kwargs): + content = self._param.content + + self._param.inputs = [] + for para in self._param.parameters: + if not para.get("component_id"): continue + component_id = para["component_id"].split("@")[0] + if para["component_id"].lower().find("@") >= 0: + cpn_id, key = para["component_id"].split("@") + for p in self._canvas.get_component(cpn_id)["obj"]._param.query: + if p["key"] == key: + kwargs[para["key"]] = p.get("value", "") + self._param.inputs.append( + {"component_id": para["component_id"], "content": kwargs[para["key"]]}) + break + else: + assert False, f"Can't find parameter '{key}' for {cpn_id}" + continue + + cpn = self._canvas.get_component(component_id)["obj"] + if cpn.component_name.lower() == "answer": + hist = self._canvas.get_history(1) + if hist: + hist = hist[0]["content"] + else: + hist = "" + kwargs[para["key"]] = hist + continue + + _, out = cpn.output(allow_partial=False) + if "content" not in out.columns: + kwargs[para["key"]] = "" + else: + kwargs[para["key"]] = " - "+"\n - ".join([o if isinstance(o, str) else str(o) for o in out["content"]]) + self._param.inputs.append({"component_id": para["component_id"], "content": kwargs[para["key"]]}) + + for n, v in kwargs.items(): + content = re.sub(r"\{%s\}" % re.escape(n), str(v), content) + + return Template.be_output(content) + diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index 01f8cc661b9..0a397bb0f55 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -120,7 +120,10 @@ def sse(): try: for ans in canvas.run(stream=True): if ans.get("running_status"): - yield "data:" + json.dumps({"code": 0, "message": "", "data": ans}, ensure_ascii=False) + "\n\n" + yield "data:" + json.dumps({"code": 0, "message": "", + "data": {"answer": ans["content"], + "running_status": True}}, + ensure_ascii=False) + "\n\n" continue for k in ans.keys(): final_ans[k] = ans[k] diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index f98fe51a04d..523d13876c4 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -167,9 +167,7 @@ def rm(): if not KnowledgebaseService.delete_by_id(req["kb_id"]): return get_data_error_result( message="Database error (Knowledgebase removal)!") - tenants = UserTenantService.query(user_id=current_user.id) - for tenant in tenants: - settings.docStoreConn.deleteIdx(search.index_name(tenant.tenant_id), req["kb_id"]) + settings.docStoreConn.delete({"kb_id": req["kb_id"]}, search.index_name(kbs[0].tenant_id), req["kb_id"]) return get_json_result(data=True) except Exception as e: return server_error_response(e) diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index 80bd8d9cb2b..0dc97ff7a61 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -115,6 +115,7 @@ def upload(dataset_id, tenant_id): return get_result( message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR ) + ''' # total size total_size = 0 for file_obj in file_objs: @@ -127,6 +128,7 @@ def upload(dataset_id, tenant_id): message=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)", code=settings.RetCode.ARGUMENT_ERROR, ) + ''' e, kb = KnowledgebaseService.get_by_id(dataset_id) if not e: raise LookupError(f"Can't find the dataset with ID {dataset_id}!") diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 9d10986a964..7136fa73716 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -517,7 +517,8 @@ def user_register(user_id, user): "llm_name": llm.llm_name, "model_type": llm.model_type, "api_key": settings.API_KEY, - "api_base": settings.LLM_BASE_URL, + "api_base": settings.LLM_BASE_URL + #"max_tokens": llm.max_tokens if llm.max_tokens else 8192 } ) diff --git a/docker/.env b/docker/.env index b3af4bc3620..4316decdbb5 100644 --- a/docker/.env +++ b/docker/.env @@ -1,5 +1,7 @@ # The type of doc engine to use. -# Supported values are `elasticsearch`, `infinity`. +# Available options: +# - `elasticsearch` (default) +# - `infinity` (https://github.com/infiniflow/infinity) DOC_ENGINE=${DOC_ENGINE:-elasticsearch} # ------------------------------ @@ -20,7 +22,7 @@ ES_HOST=es01 ES_PORT=1200 # The password for Elasticsearch. -# When updated, you must revise the `es.password` entry in service_conf.yaml accordingly. +# When updated, you must revise the `es.password` entry in service_conf.yaml accordingly. ELASTIC_PASSWORD=infini_rag_flow # The port used to expose the Kibana service to the host machine, @@ -85,7 +87,7 @@ RAGFLOW_IMAGE=infiniflow/ragflow:dev-slim # RAGFLOW_IMAGE=infiniflow/ragflow:dev # # The Docker image of the dev edition includes: -# - Embedded embedding models: +# - Built-in embedding models: # - BAAI/bge-large-zh-v1.5 # - BAAI/bge-reranker-v2-m3 # - maidalun1020/bce-embedding-base_v1 @@ -123,3 +125,7 @@ TIMEZONE='Asia/Shanghai' # Optimizations for MacOS # Uncomment the following line if your OS is MacOS: # MACOS=1 + +# The maximum file size for each uploaded file, in bytes. +# You can uncomment this line and update the value if you wish to change 128M file size limit +# MAX_CONTENT_LENGTH=134217728 \ No newline at end of file diff --git a/docker/README.md b/docker/README.md index cbb63e38d85..1de2aef3930 100644 --- a/docker/README.md +++ b/docker/README.md @@ -77,7 +77,7 @@ The [.env](./.env) file contains important environment variables for Docker. - `infiniflow/ragflow:dev-slim` (default): The RAGFlow Docker image without embedding models. - `infiniflow/ragflow:dev`: The RAGFlow Docker image with embedding models including: - - Embedded embedding models: + - Built-in embedding models: - `BAAI/bge-large-zh-v1.5` - `BAAI/bge-reranker-v2-m3` - `maidalun1020/bce-embedding-base_v1` @@ -117,6 +117,11 @@ The [.env](./.env) file contains important environment variables for Docker. - `MACOS` Optimizations for MacOS. It is disabled by default. You can uncomment this line if your OS is MacOS. +### Maximum file size + +- `MAX_CONTENT_LENGTH` + The maximum file size for each uploaded file, in bytes. You can uncomment this line if you wish to change 128M file size limit. + ## 🐋 Service configuration [service_conf.yaml](./service_conf.yaml) specifies the system-level configuration for RAGFlow and is used by its API server and task executor. In a dockerized setup, this file is automatically created based on the [service_conf.yaml.template](./service_conf.yaml.template) file (replacing all environment variables by their values). diff --git a/docs/configurations.md b/docs/configurations.md index 6dc7619fad0..fd09476e8a4 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -64,7 +64,7 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con ### MySQL - `MYSQL_PASSWORD` - The password for MySQL. + The password for MySQL. - `MYSQL_PORT` The port used to expose the MySQL service to the host machine, allowing **external** access to the MySQL database running inside the Docker container. Defaults to `5455`. @@ -75,7 +75,7 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con - `MINIO_PORT` The port used to expose the MinIO API service to the host machine, allowing **external** access to the MinIO object storage service running inside the Docker container. Defaults to `9000`. - `MINIO_USER` - The username for MinIO. + The username for MinIO. - `MINIO_PASSWORD` The password for MinIO. accordingly. @@ -95,7 +95,7 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con - `infiniflow/ragflow:dev-slim` (default): The RAGFlow Docker image without embedding models. - `infiniflow/ragflow:dev`: The RAGFlow Docker image with embedding models including: - - Embedded embedding models: + - Built-in embedding models: - `BAAI/bge-large-zh-v1.5` - `BAAI/bge-reranker-v2-m3` - `maidalun1020/bce-embedding-base_v1` diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 31fe2fcd5e1..2774b1462a3 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -286,8 +286,8 @@ Once you have selected an embedding model and used it to parse a file, you are n _When the file parsing completes, its parsing status changes to **SUCCESS**._ :::caution NOTE -- If your file parsing gets stuck at below 1%, see [FAQ 4.3](https://ragflow.io/docs/dev/faq#43-why-does-my-document-parsing-stall-at-under-one-percent). -- If your file parsing gets stuck at near completion, see [FAQ 4.4](https://ragflow.io/docs/dev/faq#44-why-does-my-pdf-parsing-stall-near-completion-while-the-log-does-not-show-any-error) +- If your file parsing gets stuck at below 1%, see [this FAQ](https://ragflow.io/docs/dev/faq#why-does-my-document-parsing-stall-at-under-one-percent). +- If your file parsing gets stuck at near completion, see [this FAQ](https://ragflow.io/docs/dev/faq#why-does-my-pdf-parsing-stall-near-completion-while-the-log-does-not-show-any-error) ::: ## Intervene with file parsing diff --git a/docs/references/faq.md b/docs/references/faq.md index 3033b343f13..ee561302717 100644 --- a/docs/references/faq.md +++ b/docs/references/faq.md @@ -5,162 +5,161 @@ slug: /faq # Frequently asked questions -Queries regarding general usage, troubleshooting, features, performance, and more. +Queries regarding general features, troubleshooting, performance, and more. -## General +--- + +## General features -### 1. What sets RAGFlow apart from other RAG products? +--- + +### What sets RAGFlow apart from other RAG products? The "garbage in garbage out" status quo remains unchanged despite the fact that LLMs have advanced Natural Language Processing (NLP) significantly. In response, RAGFlow introduces two unique features compared to other Retrieval-Augmented Generation (RAG) products. - Fine-grained document parsing: Document parsing involves images and tables, with the flexibility for you to intervene as needed. - Traceable answers with reduced hallucinations: You can trust RAGFlow's responses as you can view the citations and references supporting them. -### 2. Which languages does RAGFlow support? +--- -English, simplified Chinese, traditional Chinese for now. +### Why does it take longer for RAGFlow to parse a document than LangChain? -### 3. Which embedding models can be deployed locally? +We put painstaking effort into document pre-processing tasks like layout analysis, table structure recognition, and OCR (Optical Character Recognition) using our vision models. This contributes to the additional time required. -- BAAI/bge-large-zh-v1.5 -- BAAI/bge-base-en-v1.5 -- BAAI/bge-large-en-v1.5 -- BAAI/bge-small-en-v1.5 -- BAAI/bge-small-zh-v1.5 -- jinaai/jina-embeddings-v2-base-en -- jinaai/jina-embeddings-v2-small-en -- nomic-ai/nomic-embed-text-v1.5 -- sentence-transformers/all-MiniLM-L6-v2 -- maidalun1020/bce-embedding-base_v1 +--- -## Performance +### Why does RAGFlow require more resources than other projects? -### 1. Why does it take longer for RAGFlow to parse a document than LangChain? +RAGFlow has a number of built-in models for document structure parsing, which account for the additional computational resources. -We put painstaking effort into document pre-processing tasks like layout analysis, table structure recognition, and OCR (Optical Character Recognition) using our vision model. This contributes to the additional time required. +--- -### 2. Why does RAGFlow require more resources than other projects? +### Which architectures or devices does RAGFlow support? -RAGFlow has a number of built-in models for document structure parsing, which account for the additional computational resources. +We officially support x86 CPU and nvidia GPU. While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlow Docker images for ARM. -## Feature +--- -### 1. Which architectures or devices does RAGFlow support? +### Which embedding models can be deployed locally? + +RAGFlow offers two Docker image editions, `dev-slim` and `dev`: + +- `infiniflow/ragflow:dev-slim` (default): The RAGFlow Docker image without embedding models. +- `infiniflow/ragflow:dev`: The RAGFlow Docker image with embedding models including: + - Built-in embedding models: + - `BAAI/bge-large-zh-v1.5` + - `BAAI/bge-reranker-v2-m3` + - `maidalun1020/bce-embedding-base_v1` + - `maidalun1020/bce-reranker-base_v1` + - Embedding models that will be downloaded once you select them in the RAGFlow UI: + - `BAAI/bge-base-en-v1.5` + - `BAAI/bge-large-en-v1.5` + - `BAAI/bge-small-en-v1.5` + - `BAAI/bge-small-zh-v1.5` + - `jinaai/jina-embeddings-v2-base-en` + - `jinaai/jina-embeddings-v2-small-en` + - `nomic-ai/nomic-embed-text-v1.5` + - `sentence-transformers/all-MiniLM-L6-v2` -Currently, we only support x86 CPU and Nvidia GPU. +--- -### 2. Do you offer an API for integration with third-party applications? +### Do you offer an API for integration with third-party applications? The corresponding APIs are now available. See the [RAGFlow HTTP API Reference](./http_api_reference.md) or the [RAGFlow Python API Reference](./python_api_reference.md) for more information. -### 3. Do you support stream output? +--- + +### Do you support stream output? -This feature is supported. +Yes, we do. + +--- -### 4. Is it possible to share dialogue through URL? +### Is it possible to share dialogue through URL? No, this feature is not supported. -### 5. Do you support multiple rounds of dialogues, i.e., referencing previous dialogues as context for the current dialogue? +--- + +### Do you support multiple rounds of dialogues, i.e., referencing previous dialogues as context for the current dialogue? This feature and the related APIs are still in development. Contributions are welcome. +--- ## Troubleshooting -### 1. Issues with docker images +--- -#### 1.1 How to build the RAGFlow image from scratch? +### Issues with Docker images -``` -$ git clone https://github.com/infiniflow/ragflow.git -$ cd ragflow -$ docker build -t infiniflow/ragflow:latest . -$ cd ragflow/docker -$ docker compose up -d -``` +--- -#### 1.2 `process "/bin/sh -c cd ./web && npm i && npm run build"` failed +#### How to build the RAGFlow image from scratch? -1. Check your network from within Docker, for example: -```bash -curl https://hf-mirror.com -``` +See [Build a RAGFlow Docker image](https://ragflow.io/docs/dev/build_docker_image). -2. If your network works fine, the issue lies with the Docker network configuration. Replace the Docker building command: -```bash -docker build -t infiniflow/ragflow:vX.Y.Z. -``` - With this: -```bash -docker build -t infiniflow/ragflow:vX.Y.Z. --network host -``` +--- + +### Issues with huggingface models -### 2. Issues with huggingface models +--- -#### 2.1 Cannot access https://huggingface.co - -A *locally* deployed RAGflow downloads OCR and embedding modules from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: +#### Cannot access https://huggingface.co + +A locally deployed RAGflow downloads OCR and embedding modules from [Huggingface website](https://huggingface.co) by default. If your machine is unable to access this site, the following error occurs and PDF parsing fails: ``` FileNotFoundError: [Errno 2] No such file or directory: '/root/.cache/huggingface/hub/models--InfiniFlow--deepdoc/snapshots/be0c1e50eef6047b412d1800aa89aba4d275f997/ocr.res' ``` - To fix this issue, use https://hf-mirror.com instead: - 1. Stop all containers and remove all related resources: +To fix this issue, use https://hf-mirror.com instead: - ```bash - cd ragflow/docker/ - docker compose down - ``` +1. Stop all containers and remove all related resources: - 2. Replace `https://huggingface.co` with `https://hf-mirror.com` in **ragflow/docker/docker-compose.yml**. - - 3. Start up the server: + ```bash + cd ragflow/docker/ + docker compose down + ``` - ```bash - docker compose up -d - ``` +2. Uncomment the following line in **ragflow/docker/.env**: -#### 2.2. `MaxRetryError: HTTPSConnectionPool(host='hf-mirror.com', port=443)` + ``` + # HF_ENDPOINT=https://hf-mirror.com + ``` -This error suggests that you do not have Internet access or are unable to connect to hf-mirror.com. Try the following: +3. Start up the server: -1. Manually download the resource files from [huggingface.co/InfiniFlow/deepdoc](https://huggingface.co/InfiniFlow/deepdoc) to your local folder **~/deepdoc**. + ```bash + docker compose up -d + ``` + +--- + +#### `MaxRetryError: HTTPSConnectionPool(host='hf-mirror.com', port=443)` + +This error suggests that you do not have Internet access or are unable to connect to hf-mirror.com. Try the following: + +1. Manually download the resource files from [huggingface.co/InfiniFlow/deepdoc](https://huggingface.co/InfiniFlow/deepdoc) to your local folder **~/deepdoc**. 2. Add a volumes to **docker-compose.yml**, for example: -``` -- ~/deepdoc:/ragflow/rag/res/deepdoc -``` -#### 2.3 `FileNotFoundError: [Errno 2] No such file or directory: '/root/.cache/huggingface/hub/models--InfiniFlow--deepdoc/snapshots/FileNotFoundError: [Errno 2] No such file or directory: '/ragflow/rag/res/deepdoc/ocr.res'be0c1e50eef6047b412d1800aa89aba4d275f997/ocr.res'` + ``` + - ~/deepdoc:/ragflow/rag/res/deepdoc + ``` -1. Check your network from within Docker, for example: -```bash -curl https://hf-mirror.com -``` -2. Run `ifconfig` to check the `mtu` value. If the server's `mtu` is `1450` while the NIC's `mtu` in the container is `1500`, this mismatch may cause network instability. Adjust the `mtu` policy as follows: +--- -``` -vim docker-compose-base.yml -# Original configuration: -networks: - ragflow: - driver: bridge -# Modified configuration: -networks: - ragflow: - driver: bridge - driver_opts: - com.docker.network.driver.mtu: 1450 -``` +### Issues with RAGFlow servers -### 3. Issues with RAGFlow servers +--- -#### 3.1 `WARNING: can't find /raglof/rag/res/borker.tm` +#### `WARNING: can't find /raglof/rag/res/borker.tm` Ignore this warning and continue. All system warnings can be ignored. -#### 3.2 `network anomaly There is an abnormality in your network and you cannot connect to the server.` +--- + +#### `network anomaly There is an abnormality in your network and you cannot connect to the server.` ![anomaly](https://github.com/infiniflow/ragflow/assets/93570324/beb7ad10-92e4-4a58-8886-bfb7cbd09e5d) @@ -181,64 +180,79 @@ You will not log in to RAGFlow unless the server is fully initialized. Run `dock INFO:werkzeug:Press CTRL+C to quit ``` +--- -### 4. Issues with RAGFlow backend services - -#### 4.1 `dependency failed to start: container ragflow-mysql is unhealthy` +### Issues with RAGFlow backend services -`dependency failed to start: container ragflow-mysql is unhealthy` means that your MySQL container failed to start. Try replacing `mysql:5.7.18` with `mariadb:10.5.8` in **docker-compose-base.yml**. +--- -#### 4.2 `Realtime synonym is disabled, since no redis connection` +#### `Realtime synonym is disabled, since no redis connection` Ignore this warning and continue. All system warnings can be ignored. ![](https://github.com/infiniflow/ragflow/assets/93570324/ef5a6194-084a-4fe3-bdd5-1c025b40865c) -#### 4.3 Why does my document parsing stall at under one percent? +--- + +#### Why does my document parsing stall at under one percent? ![stall](https://github.com/infiniflow/ragflow/assets/93570324/3589cc25-c733-47d5-bbfc-fedb74a3da50) -Click the red cross beside the 'parsing status' bar, then restart the parsing process to see if the issue remains. If the issue persists and your RAGFlow is deployed locally, try the following: +Click the red cross beside the 'parsing status' bar, then restart the parsing process to see if the issue remains. If the issue persists and your RAGFlow is deployed locally, try the following: 1. Check the log of your RAGFlow server to see if it is running properly: -```bash -docker logs -f ragflow-server -``` + + ```bash + docker logs -f ragflow-server + ``` + 2. Check if the **task_executor.py** process exists. 3. Check if your RAGFlow server can access hf-mirror.com or huggingface.com. -#### 4.4 Why does my pdf parsing stall near completion, while the log does not show any error? +--- + +#### Why does my pdf parsing stall near completion, while the log does not show any error? Click the red cross beside the 'parsing status' bar, then restart the parsing process to see if the issue remains. If the issue persists and your RAGFlow is deployed locally, the parsing process is likely killed due to insufficient RAM. Try increasing your memory allocation by increasing the `MEM_LIMIT` value in **docker/.env**. :::note Ensure that you restart up your RAGFlow server for your changes to take effect! + ```bash docker compose stop ``` + ```bash docker compose up -d ``` + ::: ![nearcompletion](https://github.com/infiniflow/ragflow/assets/93570324/563974c3-f8bb-4ec8-b241-adcda8929cbb) -#### 4.5 `Index failure` +--- + +#### `Index failure` An index failure usually indicates an unavailable Elasticsearch service. -#### 4.6 How to check the log of RAGFlow? +--- + +#### How to check the log of RAGFlow? ```bash -tail -f path_to_ragflow/docker/ragflow-logs/rag/*.log +tail -f ragflow/docker/ragflow-logs/*.log ``` -#### 4.7 How to check the status of each component in RAGFlow? +--- + +#### How to check the status of each component in RAGFlow? ```bash $ docker ps ``` -*The system displays the following if all your RAGFlow components are running properly:* + +*The system displays the following if all your RAGFlow components are running properly:* ``` 5bc45806b680 infiniflow/ragflow:latest "./entrypoint.sh" 11 hours ago Up 11 hours 0.0.0.0:80->80/tcp, :::80->80/tcp, 0.0.0.0:443->443/tcp, :::443->443/tcp, 0.0.0.0:9380->9380/tcp, :::9380->9380/tcp ragflow-server @@ -247,21 +261,24 @@ d8c86f06c56b mysql:5.7.18 "docker-entrypoint.s…" 7 days ago Up cd29bcb254bc quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z "/usr/bin/docker-ent…" 2 weeks ago Up 11 hours 0.0.0.0:9001->9001/tcp, :::9001->9001/tcp, 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp ragflow-minio ``` -#### 4.8 `Exception: Can't connect to ES cluster` +--- + +#### `Exception: Can't connect to ES cluster` 1. Check the status of your Elasticsearch component: -```bash -$ docker ps -``` + ```bash + $ docker ps + ``` + *The status of a 'healthy' Elasticsearch component in your RAGFlow should look as follows:* -``` -91220e3285dd docker.elastic.co/elasticsearch/elasticsearch:8.11.3 "/bin/tini -- /usr/l…" 11 hours ago Up 11 hours (healthy) 9300/tcp, 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp ragflow-es-01 -``` + + ``` + 91220e3285dd docker.elastic.co/elasticsearch/elasticsearch:8.11.3 "/bin/tini -- /usr/l…" 11 hours ago Up 11 hours (healthy) 9300/tcp, 0.0.0.0:9200->9200/tcp, :::9200->9200/tcp ragflow-es-01 + ``` 2. If your container keeps restarting, ensure `vm.max_map_count` >= 262144 as per [this README](https://github.com/infiniflow/ragflow?tab=readme-ov-file#-start-up-the-server). Updating the `vm.max_map_count` value in **/etc/sysctl.conf** is required, if you wish to keep your change permanent. This configuration works only for Linux. - 3. If your issue persists, ensure that the ES host setting is correct: - If you are running RAGFlow with Docker, it is in **docker/service_conf.yml**. Set it as follows: @@ -269,135 +286,127 @@ $ docker ps es: hosts: 'http://es01:9200' ``` - - If you run RAGFlow outside of Docker, verify the ES host setting in **conf/service_conf.yml** using: + - If you run RAGFlow outside of Docker, verify the ES host setting in **conf/service_conf.yml** using: ```bash curl http://: ``` -#### 4.9 Can't start ES container and get `Elasticsearch did not exit normally` +--- + +#### Can't start ES container and get `Elasticsearch did not exit normally` -This is because you forgot to update the `vm.max_map_count` value in **/etc/sysctl.conf** and your change to this value was reset after a system reboot. +This is because you forgot to update the `vm.max_map_count` value in **/etc/sysctl.conf** and your change to this value was reset after a system reboot. -#### 4.10 `{"data":null,"code":100,"message":""}` +--- + +#### `{"data":null,"code":100,"message":""}` Your IP address or port number may be incorrect. If you are using the default configurations, enter `http://` (**NOT 9380, AND NO PORT NUMBER REQUIRED!**) in your browser. This should work. -#### 4.11 `Ollama - Mistral instance running at 127.0.0.1:11434 but cannot add Ollama as model in RagFlow` +--- + +#### `Ollama - Mistral instance running at 127.0.0.1:11434 but cannot add Ollama as model in RagFlow` A correct Ollama IP address and port is crucial to adding models to Ollama: -- If you are on demo.ragflow.io, ensure that the server hosting Ollama has a publicly accessible IP address.Note that 127.0.0.1 is not a publicly accessible IP address. +- If you are on demo.ragflow.io, ensure that the server hosting Ollama has a publicly accessible IP address. Note that 127.0.0.1 is not a publicly accessible IP address. - If you deploy RAGFlow locally, ensure that Ollama and RAGFlow are in the same LAN and can comunicate with each other. -#### 4.12 Do you offer examples of using deepdoc to parse PDF or other files? +See [Deploy a local LLM](../guides/deploy_local_llm.mdx) for more information. -Yes, we do. See the Python files under the **rag/app** folder. +--- -#### 4.13 Why did I fail to upload a 10MB+ file to my locally deployed RAGFlow? +#### Do you offer examples of using deepdoc to parse PDF or other files? -You probably forgot to update the **MAX_CONTENT_LENGTH** environment variable: +Yes, we do. See the Python files under the **rag/app** folder. + +--- + +#### Why did I fail to upload a 128MB+ file to my locally deployed RAGFlow? + +Ensure that you update the **MAX_CONTENT_LENGTH** environment variable: + +1. In **ragflow/docker/.env**, uncomment environment variable `MAX_CONTENT_LENGTH`: + + ``` + MAX_CONTENT_LENGTH=128000000 + ``` -1. Add environment variable `MAX_CONTENT_LENGTH` to **ragflow/docker/.env**: -``` -MAX_CONTENT_LENGTH=100000000 -``` 2. Update **docker-compose.yml**: -``` -environment: - - MAX_CONTENT_LENGTH=${MAX_CONTENT_LENGTH} -``` + + ``` + environment: + - MAX_CONTENT_LENGTH=${MAX_CONTENT_LENGTH} + ``` + 3. Restart the RAGFlow server: -``` -docker compose up ragflow -d -``` - *Now you should be able to upload files of sizes less than 100MB.* - -#### 4.14 `Table 'rag_flow.document' doesn't exist` - -This exception occurs when starting up the RAGFlow server. Try the following: - - 1. Prolong the sleep time: Go to **docker/entrypoint.sh**, locate line 26, and replace `sleep 60` with `sleep 280`. - 2. If using Windows, ensure that the **entrypoint.sh** has LF end-lines. - 3. Go to **docker/docker-compose.yml**, add the following: - ``` - ./entrypoint.sh:/ragflow/entrypoint.sh - ``` - 4. Change directory: - ```bash - cd docker - ``` - 5. Stop the RAGFlow server: - ```bash - docker compose stop - ``` - 6. Restart up the RAGFlow server: - ```bash - docker compose up - ``` - -#### 4.15 `hint : 102 Fail to access model Connection error` - -![hint102](https://github.com/infiniflow/ragflow/assets/93570324/6633d892-b4f8-49b5-9a0a-37a0a8fba3d2) - -1. Ensure that the RAGFlow server can access the base URL. -2. Do not forget to append `/v1/` to `http://IP:port`: - `http://IP:port/v1/` - -#### 4.16 `FileNotFoundError: [Errno 2] No such file or directory` - -1. Check if the status of your minio container is healthy: + + ``` + docker compose up ragflow -d + ``` + +--- + +#### `FileNotFoundError: [Errno 2] No such file or directory` + +1. Check if the status of your MinIO container is healthy: + ```bash docker ps ``` + 2. Ensure that the username and password settings of MySQL and MinIO in **docker/.env** are in line with those in **docker/service_conf.yml**. +--- + ## Usage -### 1. How to increase the length of RAGFlow responses? +--- + +### How to increase the length of RAGFlow responses? 1. Right click the desired dialog to display the **Chat Configuration** window. 2. Switch to the **Model Setting** tab and adjust the **Max Tokens** slider to get the desired length. 3. Click **OK** to confirm your change. +--- -### 2. What does Empty response mean? How to set it? - -You limit what the system responds to what you specify in **Empty response** if nothing is retrieved from your knowledge base. If you do not specify anything in **Empty response**, you let your LLM improvise, giving it a chance to hallucinate. - -### 3. Can I set the base URL for OpenAI somewhere? - -![](https://github.com/infiniflow/ragflow/assets/93570324/8cfb6fa4-8a97-415d-b9fa-b6f405a055f3) +### How to run RAGFlow with a locally deployed LLM? -### 4. How to run RAGFlow with a locally deployed LLM? +You can use Ollama or Xinference to deploy local LLM. See [here](../guides/deploy_local_llm.mdx) for more information. -You can use Ollama to deploy local LLM. See [here](../guides/deploy_local_llm.mdx) for more information. +--- -### 5. How to link up ragflow and ollama servers? +### How to interconnect RAGFlow with Ollama? -- If RAGFlow is locally deployed, ensure that your RAGFlow and Ollama are in the same LAN. +- If RAGFlow is locally deployed, ensure that your RAGFlow and Ollama are in the same LAN. - If you are using our online demo, ensure that the IP address of your Ollama server is public and accessible. -### 6. How to configure RAGFlow to respond with 100% matched results, rather than utilizing LLM? - -1. Click **Knowledge Base** in the middle top of the page. -2. Right click the desired knowledge base to display the **Configuration** dialogue. -3. Choose **Q&A** as the chunk method and click **Save** to confirm your change. - -### 7. Do I need to connect to Redis? +See [here](../guides/deploy_local_llm.mdx) for more information. -No, connecting to Redis is not required. +--- -### 8. `Error: Range of input length should be [1, 30000]` +### `Error: Range of input length should be [1, 30000]` -This error occurs because there are too many chunks matching your search criteria. Try reducing the **TopN** and increasing **Similarity threshold** to fix this issue: +This error occurs because there are too many chunks matching your search criteria. Try reducing the **TopN** and increasing **Similarity threshold** to fix this issue: -1. Click **Chat** in the middle top of the page. +1. Click **Chat** in the middle top of the page. 2. Right click the desired conversation > **Edit** > **Prompt Engine** 3. Reduce the **TopN** and/or raise **Silimarity threshold**. 4. Click **OK** to confirm your changes. ![topn](https://github.com/infiniflow/ragflow/assets/93570324/7ec72ab3-0dd2-4cff-af44-e2663b67b2fc) -### 9. How to upgrade RAGFlow? +--- + +### How to get an API key for integration with third-party applications? + +See [Acquire a RAGFlow API key](../guides/develop/acquire_ragflow_api_key.md). + +--- + +### How to upgrade RAGFlow? See [Upgrade RAGFlow](../guides/upgrade_ragflow.mdx) for more information. + +--- diff --git a/docs/references/http_api_reference.md b/docs/references/http_api_reference.md index 9a53c040a3e..8ed052bfd21 100644 --- a/docs/references/http_api_reference.md +++ b/docs/references/http_api_reference.md @@ -2028,8 +2028,8 @@ curl --request POST \ The question to start an AI-powered conversation. - `"stream"`: (*Body Parameter*), `boolean` Indicates whether to output responses in a streaming way: - - `true`: Enable streaming. - - `false`: Disable streaming (default). + - `true`: Enable streaming (default). + - `false`: Disable streaming. - `"session_id"`: (*Body Parameter*) The ID of session. If it is not provided, a new session will be generated. @@ -2239,8 +2239,8 @@ curl --request POST \ The question to start an AI-powered conversation. - `"stream"`: (*Body Parameter*), `boolean` Indicates whether to output responses in a streaming way: - - `true`: Enable streaming. - - `false`: Disable streaming (default). + - `true`: Enable streaming (default). + - `false`: Disable streaming. - `"session_id"`: (*Body Parameter*) The ID of the session. If it is not provided, a new session will be generated. diff --git a/docs/references/python_api_reference.md b/docs/references/python_api_reference.md index ff3e1a1c444..fe272a42e03 100644 --- a/docs/references/python_api_reference.md +++ b/docs/references/python_api_reference.md @@ -1332,8 +1332,8 @@ The question to start an AI-powered conversation. Indicates whether to output responses in a streaming way: -- `True`: Enable streaming. -- `False`: Disable streaming (default). +- `True`: Enable streaming (default). +- `False`: Disable streaming. ### Returns @@ -1450,8 +1450,8 @@ The question to start an AI-powered conversation. Indicates whether to output responses in a streaming way: -- `True`: Enable streaming. -- `False`: Disable streaming (default). +- `True`: Enable streaming (default). +- `False`: Disable streaming. ### Returns diff --git a/example/http/simple_example.sh b/example/http/dataset_example.sh similarity index 100% rename from example/http/simple_example.sh rename to example/http/dataset_example.sh diff --git a/example/sdk/simple_example.py b/example/sdk/dataset_example.py similarity index 100% rename from example/sdk/simple_example.py rename to example/sdk/dataset_example.py diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index c7c66513ce7..651c7a6b089 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -16,13 +16,15 @@ FusionExpr from rag.nlp import is_english, rag_tokenizer +ATTEMPT_TIME = 2 + @singleton class ESConnection(DocStoreConnection): def __init__(self): self.info = {} logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") - for _ in range(24): + for _ in range(ATTEMPT_TIME): try: self.es = Elasticsearch( settings.ES["hosts"].split(","), @@ -92,7 +94,7 @@ def deleteIdx(self, indexName: str, knowledgebaseId: str): def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: s = Index(indexName, self.es) - for i in range(3): + for i in range(ATTEMPT_TIME): try: return s.exists() except Exception as e: @@ -144,9 +146,9 @@ def search(self, selectFields: list[str], highlightFields: list[str], condition: if "minimum_should_match" in m.extra_options: minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%" bqry.must.append(Q("query_string", fields=m.fields, - type="best_fields", query=m.matching_text, - minimum_should_match=minimum_should_match, - boost=1)) + type="best_fields", query=m.matching_text, + minimum_should_match=minimum_should_match, + boost=1)) bqry.boost = 1.0 - vector_similarity_weight elif isinstance(m, MatchDenseExpr): @@ -180,7 +182,7 @@ def search(self, selectFields: list[str], highlightFields: list[str], condition: q = s.to_dict() logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) - for i in range(3): + for i in range(ATTEMPT_TIME): try: res = self.es.search(index=indexNames, body=q, @@ -201,7 +203,7 @@ def search(self, selectFields: list[str], highlightFields: list[str], condition: raise Exception("ESConnection.search timeout.") def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: - for i in range(3): + for i in range(ATTEMPT_TIME): try: res = self.es.get(index=(indexName), id=chunkId, source=True, ) @@ -233,10 +235,10 @@ def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str) -> operations.append(d_copy) res = [] - for _ in range(100): + for _ in range(ATTEMPT_TIME): try: r = self.es.bulk(index=(indexName), operations=operations, - refresh=False, timeout="600s") + refresh=False, timeout="60s") if re.search(r"False", str(r["errors"]), re.IGNORECASE): return res @@ -247,7 +249,9 @@ def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str) -> return res except Exception as e: logging.warning("ESConnection.insert got exception: " + str(e)) + res = [] if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): + res.append(str(e)) time.sleep(3) continue return res @@ -258,7 +262,7 @@ def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseI if "id" in condition and isinstance(condition["id"], str): # update specific single document chunkId = condition["id"] - for i in range(3): + for i in range(ATTEMPT_TIME): try: self.es.update(index=indexName, id=chunkId, doc=doc) return True @@ -326,7 +330,7 @@ def delete(self, condition: dict, indexName: str, knowledgebaseId: str) -> int: else: raise Exception("Condition value must be int, str or list.") logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) - for _ in range(10): + for _ in range(ATTEMPT_TIME): try: res = self.es.delete_by_query( index=indexName, @@ -437,7 +441,7 @@ def sql(self, sql: str, fetch_size: int, format: str): sql = sql.replace(p, r, 1) logging.debug(f"ESConnection.sql to es: {sql}") - for i in range(3): + for i in range(ATTEMPT_TIME): try: res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format, request_timeout="2s") diff --git a/sdk/python/ragflow_sdk/modules/agent.py b/sdk/python/ragflow_sdk/modules/agent.py index 682a4692cb5..a6b52e2c222 100644 --- a/sdk/python/ragflow_sdk/modules/agent.py +++ b/sdk/python/ragflow_sdk/modules/agent.py @@ -51,7 +51,7 @@ def __init__(self,rag,res_dict): @staticmethod def create_session(id,rag) -> Session: - res = requests.post(f"http://127.0.0.1:9380/api/v1/agents/{id}/sessions",headers={"Authorization": f"Bearer {rag.user_key}"},json={}) + res = requests.post(f"{rag.api_url}/agents/{id}/sessions",headers={"Authorization": f"Bearer {rag.user_key}"},json={}) res = res.json() if res.get("code") == 0: return Session(rag,res.get("data")) diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index bcad88f14c1..444d5c9bf22 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -16,9 +16,6 @@ def generate_random_email(): fN33jCHRoDUW81IH9zjij/vaw8IbVyb6vuwg6MX6inOEBRRzVbRYxXOu1wkWY6SsI8X70oF9aeLFp/PzQpjoe/YbSqpTq8qqrmHzn9vO+yvyYyvmDsphXe X8f7fp9c7vUsfOCkM+gHY3PadG+QHa7KI7mzTKgUTZImK6BZtfRBATDTthEUbbaTewY4H0MnWiCeeDhcbeQao6cFy1To8pE3RpmxnGnS8BsBn8w==''' -def get_email(): - return EMAIL - def register(): url = HOST_ADDRESS + "/v1/user/register" name = "user" @@ -50,3 +47,12 @@ def get_api_key_fixture(): raise Exception(res.get("message")) return res["data"].get("token") +@pytest.fixture(scope="session") +def get_auth(): + register() + auth = login() + return auth + +@pytest.fixture(scope="session") +def get_email(): + return EMAIL diff --git a/sdk/python/test/t_document.py b/sdk/python/test/t_document.py deleted file mode 100644 index 1624ef03fdd..00000000000 --- a/sdk/python/test/t_document.py +++ /dev/null @@ -1,62 +0,0 @@ -from ragflow_sdk import RAGFlow, DataSet, Document, Chunk -from common import HOST_ADDRESS - - -def test_upload_document_with_success(get_api_key_fixture): - API_KEY = get_api_key_fixture - rag = RAGFlow(API_KEY, HOST_ADDRESS) - ds = rag.create_dataset(name="test_upload_document") - blob = b"Sample document content for test." - with open("ragflow.txt","rb") as file: - blob_2=file.read() - document_infos = [] - document_infos.append({"displayed_name": "test_1.txt","blob": blob}) - document_infos.append({"displayed_name": "test_2.txt","blob": blob_2}) - ds.upload_documents(document_infos) - - -def test_update_document_with_success(get_api_key_fixture): - API_KEY = get_api_key_fixture - rag = RAGFlow(API_KEY, HOST_ADDRESS) - ds = rag.create_dataset(name="test_update_document") - blob = b"Sample document content for test." - document_infos=[{"displayed_name":"test.txt","blob":blob}] - docs=ds.upload_documents(document_infos) - doc = docs[0] - doc.update({"chunk_method": "manual", "name": "manual.txt"}) - - -def test_download_document_with_success(get_api_key_fixture): - API_KEY = get_api_key_fixture - rag = RAGFlow(API_KEY, HOST_ADDRESS) - ds = rag.create_dataset(name="test_download_document") - blob = b"Sample document content for test." - document_infos=[{"displayed_name": "test_1.txt","blob": blob}] - docs=ds.upload_documents(document_infos) - doc = docs[0] - with open("test_download.txt","wb+") as file: - file.write(doc.download()) - - -def test_list_documents_in_dataset_with_success(get_api_key_fixture): - API_KEY = get_api_key_fixture - rag = RAGFlow(API_KEY, HOST_ADDRESS) - ds = rag.create_dataset(name="test_list_documents") - blob = b"Sample document content for test." - document_infos = [{"displayed_name": "test.txt","blob":blob}] - ds.upload_documents(document_infos) - ds.list_documents(keywords="test", page=1, page_size=12) - - - -def test_delete_documents_in_dataset_with_success(get_api_key_fixture): - API_KEY = get_api_key_fixture - rag = RAGFlow(API_KEY, HOST_ADDRESS) - ds = rag.create_dataset(name="test_delete_documents") - name = "test_delete_documents.txt" - blob = b"Sample document content for test." - document_infos=[{"displayed_name": name, "blob": blob}] - docs = ds.upload_documents(document_infos) - ds.delete_documents([docs[0].id]) - - diff --git a/sdk/python/test/test_data/.txt b/sdk/python/test/test_data/.txt deleted file mode 100644 index b0b611ed50e..00000000000 --- a/sdk/python/test/test_data/.txt +++ /dev/null @@ -1,2 +0,0 @@ -hhh -hhh \ No newline at end of file diff --git a/sdk/python/test/test_data/empty.txt b/sdk/python/test/test_data/empty.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sdk/python/test/test_data/lol.txt b/sdk/python/test/test_data/lol.txt deleted file mode 100644 index 34883d23ace..00000000000 --- a/sdk/python/test/test_data/lol.txt +++ /dev/null @@ -1,3 +0,0 @@ -llll -ooooo -llll \ No newline at end of file diff --git a/sdk/python/test/test_data/story.txt b/sdk/python/test/test_data/story.txt deleted file mode 100644 index d000248c3f2..00000000000 --- a/sdk/python/test/test_data/story.txt +++ /dev/null @@ -1,8 +0,0 @@ -Once upon a time, in a small village nestled at the foot of a towering mountain, lived a young girl named Lily. Lily had a heart as pure as the mountain's snowcaps and a spirit as adventurous as the winding trails that led to its peak. -One day, as Lily was gathering berries in the forest's edge, she stumbled upon an old, weathered map hidden beneath a fallen tree. The map was covered in strange symbols and a single, glowing word: "Treasure." Curiousity piqued, Lily decided to embark on a quest to uncover the mystery of the treasure. -With nothing more than her trusty basket of berries, a few pieces of bread, and the map, Lily set off into the unknown. As she climbed higher and higher into the mountains, the air grew crisp, and the scenery transformed into a breathtaking tapestry of lush greenery and sparkling streams. -Along the way, Lily encountered all sorts of challenges. She had to navigate treacherous rivers using fallen logs as bridges, climb steep cliffs with nothing but her agility and determination, and even outsmart a mischievous pack of foxes that tried to lead her astray. But through it all, Lily remained steadfast, her heart filled with hope and a sense of purpose. -Finally, after what seemed like an eternity of trekking, Lily arrived at a hidden valley. At its center stood an ancient tree, its roots entwined with glittering jewels and a chest made of pure gold. This, the map had revealed, was the source of the treasure. -But as Lily approached the chest, she realized that the true treasure was not the riches before her. It was the journey itself—the friendships she had forged with the animals she encountered, the strength she had gained from overcoming obstacles, and the sense of wonder and discovery that filled her heart. -With a smile on her face, Lily gently closed the chest and left it where it was, content in the knowledge that the greatest treasures in life are not always found in gold or jewels. She turned back towards home, her heart full of stories to share and a spirit that had been forever changed by her adventure. -And so, Lily returned to her village, a hero in her own right, with a tale that would be whispered around firesides for generations to come. \ No newline at end of file diff --git a/sdk/python/test/test_data/test.txt b/sdk/python/test/test_data/test.txt deleted file mode 100644 index 30fde28b94f..00000000000 --- a/sdk/python/test/test_data/test.txt +++ /dev/null @@ -1,3 +0,0 @@ -test -test -test \ No newline at end of file diff --git a/sdk/python/test/test_data/test1.txt b/sdk/python/test/test_data/test1.txt deleted file mode 100644 index c93f259337e..00000000000 --- a/sdk/python/test/test_data/test1.txt +++ /dev/null @@ -1,4 +0,0 @@ -test1 -test1 -aaaa document args arg -rag document \ No newline at end of file diff --git a/sdk/python/test/test_data/test2.txt b/sdk/python/test/test_data/test2.txt deleted file mode 100644 index 9363f20a31e..00000000000 --- a/sdk/python/test/test_data/test2.txt +++ /dev/null @@ -1,4 +0,0 @@ -test22 -test22 -aaaa document args arg -rag document \ No newline at end of file diff --git a/sdk/python/test/test_data/test3.txt b/sdk/python/test/test_data/test3.txt deleted file mode 100644 index db848e88d02..00000000000 --- a/sdk/python/test/test_data/test3.txt +++ /dev/null @@ -1,4 +0,0 @@ -test3 -test333 -aaaa document args arg -rag document \ No newline at end of file diff --git a/sdk/python/test/test_data/westworld.pdf b/sdk/python/test/test_data/westworld.pdf deleted file mode 100644 index d556794a37c..00000000000 Binary files a/sdk/python/test/test_data/westworld.pdf and /dev/null differ diff --git a/sdk/python/test/common.py b/sdk/python/test/test_frontend_api/common.py similarity index 100% rename from sdk/python/test/common.py rename to sdk/python/test/test_frontend_api/common.py diff --git a/sdk/python/test/test_frontend_api/get_email.py b/sdk/python/test/test_frontend_api/get_email.py new file mode 100644 index 00000000000..df053fa768b --- /dev/null +++ b/sdk/python/test/test_frontend_api/get_email.py @@ -0,0 +1,3 @@ +def test_get_email(get_email): + print(f"\nEmail account:",flush=True) + print(f"{get_email}\n",flush=True) \ No newline at end of file diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py new file mode 100644 index 00000000000..de81e6824af --- /dev/null +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -0,0 +1,10 @@ +from common import HOST_ADDRESS +import requests +def test_create_dataset(get_auth): + authorization={"Authorization": get_auth} + url = f"{HOST_ADDRESS}/v1/kb/create" + json = {"name":"test_create_dataset"} + res = requests.post(url=url,headers=authorization,json=json) + res = res.json() + assert res.get("code") == 0,f"{res.get('message')}" + diff --git a/sdk/python/test/test_sdk_api/common.py b/sdk/python/test/test_sdk_api/common.py new file mode 100644 index 00000000000..a5bc8ad32bc --- /dev/null +++ b/sdk/python/test/test_sdk_api/common.py @@ -0,0 +1,2 @@ +import os +HOST_ADDRESS=os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/get_email.py b/sdk/python/test/test_sdk_api/get_email.py new file mode 100644 index 00000000000..df053fa768b --- /dev/null +++ b/sdk/python/test/test_sdk_api/get_email.py @@ -0,0 +1,3 @@ +def test_get_email(get_email): + print(f"\nEmail account:",flush=True) + print(f"{get_email}\n",flush=True) \ No newline at end of file diff --git a/sdk/python/test/t_chat.py b/sdk/python/test/test_sdk_api/t_chat.py similarity index 91% rename from sdk/python/test/t_chat.py rename to sdk/python/test/test_sdk_api/t_chat.py index cf2f426be50..785081b65d8 100644 --- a/sdk/python/test/t_chat.py +++ b/sdk/python/test/test_sdk_api/t_chat.py @@ -6,7 +6,7 @@ def test_create_chat_with_name(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_create_chat") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name":displayed_name,"blob":blob} documents = [] @@ -22,7 +22,7 @@ def test_update_chat_with_name(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_update_chat") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name": displayed_name, "blob": blob} documents = [] @@ -39,7 +39,7 @@ def test_delete_chats_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_delete_chat") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name": displayed_name, "blob": blob} documents = [] @@ -55,7 +55,7 @@ def test_list_chats_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_list_chats") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name": displayed_name, "blob": blob} documents = [] diff --git a/sdk/python/test/t_chunk.py b/sdk/python/test/test_sdk_api/t_chunk.py similarity index 97% rename from sdk/python/test/t_chunk.py rename to sdk/python/test/test_sdk_api/t_chunk.py index 5c2b5f205be..13b4c06d781 100644 --- a/sdk/python/test/t_chunk.py +++ b/sdk/python/test/test_sdk_api/t_chunk.py @@ -7,7 +7,7 @@ def test_parse_document_with_txt(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) ds = rag.create_dataset(name="test_parse_document") name = 'ragflow_test.txt' - with open("test_data/ragflow_test.txt","rb") as file : + with open("test_data/ragflow_test.txt", "rb") as file : blob = file.read() docs = ds.upload_documents([{"displayed_name": name, "blob": blob}]) doc = docs[0] @@ -26,7 +26,7 @@ def test_parse_and_cancel_document(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) ds = rag.create_dataset(name="test_parse_and_cancel_document") name = 'ragflow_test.txt' - with open("test_data/ragflow_test.txt","rb") as file : + with open("test_data/ragflow_test.txt", "rb") as file : blob = file.read() docs=ds.upload_documents([{"displayed_name": name, "blob": blob}]) doc = docs[0] @@ -40,7 +40,7 @@ def test_bulk_parse_documents(get_api_key_fixture): API_KEY = get_api_key_fixture rag = RAGFlow(API_KEY, HOST_ADDRESS) ds = rag.create_dataset(name="test_bulk_parse_and_cancel_documents") - with open("ragflow.txt","rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() documents = [ {'displayed_name': 'test1.txt', 'blob': blob}, diff --git a/sdk/python/test/t_dataset.py b/sdk/python/test/test_sdk_api/t_dataset.py similarity index 100% rename from sdk/python/test/t_dataset.py rename to sdk/python/test/test_sdk_api/t_dataset.py diff --git a/sdk/python/test/test_sdk_api/t_document.py b/sdk/python/test/test_sdk_api/t_document.py new file mode 100644 index 00000000000..581e04032a3 --- /dev/null +++ b/sdk/python/test/test_sdk_api/t_document.py @@ -0,0 +1,166 @@ +from ragflow_sdk import RAGFlow +from common import HOST_ADDRESS +import pytest + +def test_upload_document_with_success(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_upload_document") + blob = b"Sample document content for test." + with open("test_data/ragflow.txt", "rb") as file: + blob_2=file.read() + document_infos = [] + document_infos.append({"displayed_name": "test_1.txt","blob": blob}) + document_infos.append({"displayed_name": "test_2.txt","blob": blob_2}) + ds.upload_documents(document_infos) + + +def test_update_document_with_success(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_update_document") + blob = b"Sample document content for test." + document_infos=[{"displayed_name":"test.txt","blob":blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + doc.update({"chunk_method": "manual", "name": "manual.txt"}) + + +def test_download_document_with_success(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_download_document") + blob = b"Sample document content for test." + document_infos=[{"displayed_name": "test_1.txt","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + with open("test_download.txt","wb+") as file: + file.write(doc.download()) + + +def test_list_documents_in_dataset_with_success(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_list_documents") + blob = b"Sample document content for test." + document_infos = [{"displayed_name": "test.txt","blob":blob}] + ds.upload_documents(document_infos) + ds.list_documents(keywords="test", page=1, page_size=12) + + +def test_delete_documents_in_dataset_with_success(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_delete_documents") + name = "test_delete_documents.txt" + blob = b"Sample document content for test." + document_infos=[{"displayed_name": name, "blob": blob}] + docs = ds.upload_documents(document_infos) + ds.delete_documents([docs[0].id]) + +# upload and parse the document with different in different parse method. +def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_pdf_document") + with open("test_data/test.pdf", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.pdf","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) + +def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_docx_document") + with open("test_data/test.docx", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.docx","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) +def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_excel_document") + with open("test_data/test.xlsx", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.xlsx","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) +def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_ppt_document") + with open("test_data/test.ppt", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.ppt","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) +def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_image_document") + with open("test_data/test.jpg", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.jpg","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) +def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_txt_document") + with open("test_data/test.txt", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.txt","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) +def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_md_document") + with open("test_data/test.md", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.md","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) + +def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_json_document") + with open("test_data/test.json", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.json","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) + +@pytest.mark.skip(reason="") +def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_eml_document") + with open("test_data/test.eml", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.eml","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) + +def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture): + API_KEY = get_api_key_fixture + rag = RAGFlow(API_KEY, HOST_ADDRESS) + ds = rag.create_dataset(name="test_html_document") + with open("test_data/test.html", "rb") as file: + blob=file.read() + document_infos = [{"displayed_name": "test.html","blob": blob}] + docs=ds.upload_documents(document_infos) + doc = docs[0] + ds.async_parse_documents([doc.id]) \ No newline at end of file diff --git a/sdk/python/test/t_session.py b/sdk/python/test/test_sdk_api/t_session.py similarity index 93% rename from sdk/python/test/t_session.py rename to sdk/python/test/test_sdk_api/t_session.py index 45984f5aa93..2e46669cfbe 100644 --- a/sdk/python/test/t_session.py +++ b/sdk/python/test/test_sdk_api/t_session.py @@ -8,7 +8,7 @@ def test_create_session_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_create_session") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name":displayed_name,"blob":blob} documents = [] @@ -25,7 +25,7 @@ def test_create_conversation_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_create_conversation") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name": displayed_name, "blob": blob} documents = [] @@ -47,7 +47,7 @@ def test_delete_sessions_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_delete_session") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name":displayed_name,"blob":blob} documents = [] @@ -65,7 +65,7 @@ def test_update_session_with_name(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_update_session") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name": displayed_name, "blob": blob} documents = [] @@ -83,7 +83,7 @@ def test_list_sessions_with_success(get_api_key_fixture): rag = RAGFlow(API_KEY, HOST_ADDRESS) kb = rag.create_dataset(name="test_list_session") displayed_name = "ragflow.txt" - with open("ragflow.txt", "rb") as file: + with open("test_data/ragflow.txt", "rb") as file: blob = file.read() document = {"displayed_name":displayed_name,"blob":blob} documents = [] diff --git a/sdk/python/test/ragflow.txt b/sdk/python/test/test_sdk_api/test_data/ragflow.txt similarity index 100% rename from sdk/python/test/ragflow.txt rename to sdk/python/test/test_sdk_api/test_data/ragflow.txt diff --git a/sdk/python/test/test_data/ragflow_test.txt b/sdk/python/test/test_sdk_api/test_data/ragflow_test.txt similarity index 100% rename from sdk/python/test/test_data/ragflow_test.txt rename to sdk/python/test/test_sdk_api/test_data/ragflow_test.txt diff --git a/sdk/python/test/test_sdk_api/test_data/test.docx b/sdk/python/test/test_sdk_api/test_data/test.docx new file mode 100644 index 00000000000..2eba99d1cb5 Binary files /dev/null and b/sdk/python/test/test_sdk_api/test_data/test.docx differ diff --git a/sdk/python/test/test_sdk_api/test_data/test.html b/sdk/python/test/test_sdk_api/test_data/test.html new file mode 100644 index 00000000000..ba3cded0338 --- /dev/null +++ b/sdk/python/test/test_sdk_api/test_data/test.html @@ -0,0 +1,148 @@ + + + + + + + + Sample HTML 1 + + + + Sample HTML 1 +

Minime vero, inquit ille, consentit.

+ +

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Inscite autem medicinae et gubernationis ultimum cum ultimo sapientiae comparatur. Cur igitur, cum de re conveniat, non malumus usitate loqui?

+ +
    +
  1. Si qua in iis corrigere voluit, deteriora fecit.
  2. +
  3. At quicum ioca seria, ut dicitur, quicum arcana, quicum occulta omnia?
  4. +
  5. An dolor longissimus quisque miserrimus, voluptatem non optabiliorem diuturnitas facit?
  6. +
  7. Multoque hoc melius nos veriusque quam Stoici.
  8. +
  9. Stuprata per vim Lucretia a regis filio testata civis se ipsa interemit.
  10. +
  11. Ego vero isti, inquam, permitto.
  12. +
+ + +

Graecum enim hunc versum nostis omnes-: Suavis laborum est praeteritorum memoria. Qui enim existimabit posse se miserum esse beatus non erit. Si qua in iis corrigere voluit, deteriora fecit. Si qua in iis corrigere voluit, deteriora fecit. Dic in quovis conventu te omnia facere, ne doleas. Tu quidem reddes;

+ +
    +
  • Duo Reges: constructio interrete.
  • +
  • Contineo me ab exemplis.
  • +
  • Quo plebiscito decreta a senatu est consuli quaestio Cn.
  • +
  • Quicquid porro animo cernimus, id omne oritur a sensibus;
  • +
  • Eam si varietatem diceres, intellegerem, ut etiam non dicente te intellego;
  • +
  • Qua ex cognitione facilior facta est investigatio rerum occultissimarum.
  • +
+ + +
+ Me igitur ipsum ames oportet, non mea, si veri amici futuri sumus. +
+ + + \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/test_data/test.jpg b/sdk/python/test/test_sdk_api/test_data/test.jpg new file mode 100644 index 00000000000..ff2d4ebf8ec Binary files /dev/null and b/sdk/python/test/test_sdk_api/test_data/test.jpg differ diff --git a/sdk/python/test/test_sdk_api/test_data/test.json b/sdk/python/test/test_sdk_api/test_data/test.json new file mode 100644 index 00000000000..a0c8c82e954 --- /dev/null +++ b/sdk/python/test/test_sdk_api/test_data/test.json @@ -0,0 +1,107 @@ +{ + "单车": [ + "自行车" + ], + "青禾服装": [ + "青禾服饰" + ], + "救济灾民": [ + "救助", + "灾民救济", + "赈济" + ], + "左移": [], + "低速": [], + "雨果网": [], + "钢小二": [ + "成立于2013年,位于江苏省无锡市,是一家以从事研究和试验发展为主的企业" + ], + "第五项": [ + "5项" + ], + "铸排机": [ + "机排", + "排铸机", + "排铸" + ], + "金淳高分子": [], + "麦门冬汤": [], + "错位": [], + "佰特吉姆": [], + "楼体": [], + "展美科技": [ + "美展" + ], + "中寮": [], + "贪官汙吏": [ + "...", + "贪吏", + "贪官污吏" + ], + "掩蔽部": [ + "掩 蔽 部" + ], + "海宏智能": [], + "中寰": [], + "万次": [], + "领星资本": [ + "星领" + ], + "肯讯": [], + "坎肩": [], + "爱农人": [], + "易美餐": [], + "寸丝半粟": [], + "罗丹萍": [], + "转导物": [], + "泊寓": [], + "万欧": [ + "欧万" + ], + "友聚惠": [ + "友惠", + "惠友" + ], + "舞牙弄爪": [ + ":形容凶猛的样子,比喻威胁、恐吓", + "原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子", + "成语解释:原形容猛兽的凶相,后常用来比喻猖狂凶恶的样子", + "原形容猛兽的凶相,后常用来比喻猖狂(好工具hao86.com", + "牙舞爪", + "形容猛兽凶恶可怕。也比喻猖狂凶恶", + "舞爪" + ], + "上海致上": [ + "上海上", + "上海市" + ], + "迪因加": [], + "李正茂": [], + "君来投": [], + "双掌空": [ + "双掌 空", + "空掌", + "两手空空" + ], + "浩石": [ + "石浩", + "皓石" + ], + "云阅文学": [], + "阿斯帕": [], + "中导": [], + "以诚相待": [], + "中融金服": [], + "尚股网": [], + "叶立钦": [ + "叶利钦" + ], + "新信钱包": [ + "信信" + ], + "赛苏投资": [ + "投资者" + ], + "售价": [], + "帮医网": [] +} \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/test_data/test.md b/sdk/python/test/test_sdk_api/test_data/test.md new file mode 100644 index 00000000000..0639b98ba1c --- /dev/null +++ b/sdk/python/test/test_sdk_api/test_data/test.md @@ -0,0 +1,21 @@ +Quod equidem non reprehendo; +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus? + +Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat? + +Quis istum dolorem timet? +Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio. + +Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia. +Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas? + +Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit. +Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant. +Esse enim quam vellet iniquus iustus poterat inpune. +Quae autem natura suae primae institutionis oblita est? +Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt; +Hoc est non modo cor non habere, sed ne palatum quidem. +Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates? + +Idemne, quod iucunde? +Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu? \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/test_data/test.pdf b/sdk/python/test/test_sdk_api/test_data/test.pdf new file mode 100644 index 00000000000..72d0d21d38f Binary files /dev/null and b/sdk/python/test/test_sdk_api/test_data/test.pdf differ diff --git a/sdk/python/test/test_sdk_api/test_data/test.ppt b/sdk/python/test/test_sdk_api/test_data/test.ppt new file mode 100644 index 00000000000..7a3ed0d04c3 Binary files /dev/null and b/sdk/python/test/test_sdk_api/test_data/test.ppt differ diff --git a/sdk/python/test/test_sdk_api/test_data/test.txt b/sdk/python/test/test_sdk_api/test_data/test.txt new file mode 100644 index 00000000000..0639b98ba1c --- /dev/null +++ b/sdk/python/test/test_sdk_api/test_data/test.txt @@ -0,0 +1,21 @@ +Quod equidem non reprehendo; +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quibus natura iure responderit non esse verum aliunde finem beate vivendi, a se principia rei gerendae peti; Quae enim adhuc protulisti, popularia sunt, ego autem a te elegantiora desidero. Duo Reges: constructio interrete. Tum Lucius: Mihi vero ista valde probata sunt, quod item fratri puto. Bestiarum vero nullum iudicium puto. Nihil enim iam habes, quod ad corpus referas; Deinde prima illa, quae in congressu solemus: Quid tu, inquit, huc? Et homini, qui ceteris animantibus plurimum praestat, praecipue a natura nihil datum esse dicemus? + +Iam id ipsum absurdum, maximum malum neglegi. Quod ea non occurrentia fingunt, vincunt Aristonem; Atqui perspicuum est hominem e corpore animoque constare, cum primae sint animi partes, secundae corporis. Fieri, inquam, Triari, nullo pacto potest, ut non dicas, quid non probes eius, a quo dissentias. Equidem e Cn. An dubium est, quin virtus ita maximam partem optineat in rebus humanis, ut reliquas obruat? + +Quis istum dolorem timet? +Summus dolor plures dies manere non potest? Dicet pro me ipsa virtus nec dubitabit isti vestro beato M. Tubulum fuisse, qua illum, cuius is condemnatus est rogatione, P. Quod si ita sit, cur opera philosophiae sit danda nescio. + +Ex eorum enim scriptis et institutis cum omnis doctrina liberalis, omnis historia. +Quod si ita est, sequitur id ipsum, quod te velle video, omnes semper beatos esse sapientes. Cum enim fertur quasi torrens oratio, quamvis multa cuiusque modi rapiat, nihil tamen teneas, nihil apprehendas, nusquam orationem rapidam coerceas. Ita redarguitur ipse a sese, convincunturque scripta eius probitate ipsius ac moribus. At quanta conantur! Mundum hunc omnem oppidum esse nostrum! Incendi igitur eos, qui audiunt, vides. Vide, ne magis, inquam, tuum fuerit, cum re idem tibi, quod mihi, videretur, non nova te rebus nomina inponere. Qui-vere falsone, quaerere mittimus-dicitur oculis se privasse; Si ista mala sunt, in quae potest incidere sapiens, sapientem esse non esse ad beate vivendum satis. At vero si ad vitem sensus accesserit, ut appetitum quendam habeat et per se ipsa moveatur, quid facturam putas? + +Quem si tenueris, non modo meum Ciceronem, sed etiam me ipsum abducas licebit. +Stulti autem malorum memoria torquentur, sapientes bona praeterita grata recordatione renovata delectant. +Esse enim quam vellet iniquus iustus poterat inpune. +Quae autem natura suae primae institutionis oblita est? +Verum tamen cum de rebus grandioribus dicas, ipsae res verba rapiunt; +Hoc est non modo cor non habere, sed ne palatum quidem. +Voluptatem cum summum bonum diceret, primum in eo ipso parum vidit, deinde hoc quoque alienum; Sed tu istuc dixti bene Latine, parum plane. Nam haec ipsa mihi erunt in promptu, quae modo audivi, nec ante aggrediar, quam te ab istis, quos dicis, instructum videro. Fatebuntur Stoici haec omnia dicta esse praeclare, neque eam causam Zenoni desciscendi fuisse. Non autem hoc: igitur ne illud quidem. Ratio quidem vestra sic cogit. Cum audissem Antiochum, Brute, ut solebam, cum M. An quod ita callida est, ut optime possit architectari voluptates? + +Idemne, quod iucunde? +Haec mihi videtur delicatior, ut ita dicam, molliorque ratio, quam virtutis vis gravitasque postulat. Sed quoniam et advesperascit et mihi ad villam revertendum est, nunc quidem hactenus; Cuius ad naturam apta ratio vera illa et summa lex a philosophis dicitur. Neque solum ea communia, verum etiam paria esse dixerunt. Sed nunc, quod agimus; A mene tu? \ No newline at end of file diff --git a/sdk/python/test/test_sdk_api/test_data/test.xlsx b/sdk/python/test/test_sdk_api/test_data/test.xlsx new file mode 100644 index 00000000000..dcde2d3dfb4 Binary files /dev/null and b/sdk/python/test/test_sdk_api/test_data/test.xlsx differ diff --git a/web/.npmrc b/web/.npmrc index 8f46dd2575d..5677efaaae6 100644 --- a/web/.npmrc +++ b/web/.npmrc @@ -1,2 +1,2 @@ +engine-strict=true registry=https://registry.npmmirror.com/ - diff --git a/web/package-lock.json b/web/package-lock.json index d071d5082f8..99591b6337c 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -106,6 +106,9 @@ "ts-node": "^10.9.2", "typescript": "^5.0.3", "umi-plugin-icons": "^0.1.1" + }, + "engines": { + "node": ">=18.20.4" } }, "node_modules/@aashutoshrathi/word-wrap": { diff --git a/web/package.json b/web/package.json index f54dd04c126..d4ed83b1a1e 100644 --- a/web/package.json +++ b/web/package.json @@ -117,5 +117,8 @@ "ts-node": "^10.9.2", "typescript": "^5.0.3", "umi-plugin-icons": "^0.1.1" + }, + "engines": { + "node": ">=18.20.4" } } diff --git a/web/src/assets/svg/template.svg b/web/src/assets/svg/template.svg new file mode 100644 index 00000000000..d3871d27a86 --- /dev/null +++ b/web/src/assets/svg/template.svg @@ -0,0 +1,6 @@ + + + \ No newline at end of file diff --git a/web/src/components/editable-cell.tsx b/web/src/components/editable-cell.tsx index f8e367ff7b3..4a960d4711c 100644 --- a/web/src/components/editable-cell.tsx +++ b/web/src/components/editable-cell.tsx @@ -78,7 +78,7 @@ export const EditableCell: React.FC = ({ if (editable) { childNode = editing ? ( {t('howUseId')} diff --git a/web/src/pages/flow/canvas/index.tsx b/web/src/pages/flow/canvas/index.tsx index 0f20b894b3b..afa4c1e0699 100644 --- a/web/src/pages/flow/canvas/index.tsx +++ b/web/src/pages/flow/canvas/index.tsx @@ -35,6 +35,7 @@ import { RelevantNode } from './node/relevant-node'; import { RetrievalNode } from './node/retrieval-node'; import { RewriteNode } from './node/rewrite-node'; import { SwitchNode } from './node/switch-node'; +import { TemplateNode } from './node/template-node'; const nodeTypes = { ragNode: RagNode, @@ -50,6 +51,7 @@ const nodeTypes = { rewriteNode: RewriteNode, keywordNode: KeywordNode, invokeNode: InvokeNode, + templateNode: TemplateNode, }; const edgeTypes = { diff --git a/web/src/pages/flow/canvas/node/popover.tsx b/web/src/pages/flow/canvas/node/popover.tsx index f943c2120ed..f9cb70bd54b 100644 --- a/web/src/pages/flow/canvas/node/popover.tsx +++ b/web/src/pages/flow/canvas/node/popover.tsx @@ -35,10 +35,10 @@ export function NextNodePopover({ children, nodeId, name }: IProps) { const inputs: Array<{ component_id: string; content: string }> = get( component, - ['obj', 'params', 'inputs'], + ['obj', 'inputs'], [], ); - const output = get(component, ['obj', 'params', 'output'], {}); + const output = get(component, ['obj', 'output'], {}); const { replacedOutput } = useReplaceIdWithText(output); const stopPropagation: MouseEventHandler = useCallback((e) => { e.stopPropagation(); diff --git a/web/src/pages/flow/canvas/node/template-node.tsx b/web/src/pages/flow/canvas/node/template-node.tsx new file mode 100644 index 00000000000..65fcbe471d3 --- /dev/null +++ b/web/src/pages/flow/canvas/node/template-node.tsx @@ -0,0 +1,68 @@ +import { Flex } from 'antd'; +import classNames from 'classnames'; +import { get } from 'lodash'; +import { Handle, NodeProps, Position } from 'reactflow'; +import { useGetComponentLabelByValue } from '../../hooks'; +import { IGenerateParameter, NodeData } from '../../interface'; +import { LeftHandleStyle, RightHandleStyle } from './handle-icon'; +import NodeHeader from './node-header'; + +import styles from './index.less'; + +export function TemplateNode({ + id, + data, + isConnectable = true, + selected, +}: NodeProps) { + const parameters: IGenerateParameter[] = get(data, 'form.parameters', []); + const getLabel = useGetComponentLabelByValue(id); + + return ( +
+ + + + + + + {parameters.map((x) => ( + + + + {getLabel(x.component_id)} + + + ))} + +
+ ); +} diff --git a/web/src/pages/flow/constant.tsx b/web/src/pages/flow/constant.tsx index 132fbd1101b..e06398f63e7 100644 --- a/web/src/pages/flow/constant.tsx +++ b/web/src/pages/flow/constant.tsx @@ -19,6 +19,7 @@ import { ReactComponent as NoteIcon } from '@/assets/svg/note.svg'; import { ReactComponent as PubMedIcon } from '@/assets/svg/pubmed.svg'; import { ReactComponent as QWeatherIcon } from '@/assets/svg/qweather.svg'; import { ReactComponent as SwitchIcon } from '@/assets/svg/switch.svg'; +import { ReactComponent as TemplateIcon } from '@/assets/svg/template.svg'; import { ReactComponent as TuShareIcon } from '@/assets/svg/tushare.svg'; import { ReactComponent as WenCaiIcon } from '@/assets/svg/wencai.svg'; import { ReactComponent as WikipediaIcon } from '@/assets/svg/wikipedia.svg'; @@ -85,6 +86,7 @@ export enum Operator { Note = 'Note', Crawler = 'Crawler', Invoke = 'Invoke', + Template = 'Template', } export const CommonOperatorList = Object.values(Operator).filter( @@ -124,6 +126,7 @@ export const operatorIconMap = { [Operator.Note]: NoteIcon, [Operator.Crawler]: CrawlerIcon, [Operator.Invoke]: InvokeIcon, + [Operator.Template]: TemplateIcon, }; export const operatorMap: Record< @@ -253,6 +256,9 @@ export const operatorMap: Record< [Operator.Invoke]: { backgroundColor: '#dee0e2', }, + [Operator.Template]: { + backgroundColor: '#dee0e2', + }, }; export const componentMenuList = [ @@ -286,6 +292,9 @@ export const componentMenuList = [ { name: Operator.Concentrator, }, + { + name: Operator.Template, + }, { name: Operator.Note, }, @@ -566,6 +575,11 @@ export const initialInvokeValues = { clean_html: false, }; +export const initialTemplateValues = { + content: '', + parameters: [], +}; + export const CategorizeAnchorPointPositions = [ { top: 1, right: 34 }, { top: 8, right: 18 }, @@ -645,6 +659,7 @@ export const RestrictedUpstreamMap = { [Operator.Crawler]: [Operator.Begin], [Operator.Note]: [], [Operator.Invoke]: [Operator.Begin], + [Operator.Template]: [Operator.Begin, Operator.Relevant], }; export const NodeMap = { @@ -680,6 +695,7 @@ export const NodeMap = { [Operator.Note]: 'noteNode', [Operator.Crawler]: 'ragNode', [Operator.Invoke]: 'invokeNode', + [Operator.Template]: 'templateNode', }; export const LanguageOptions = [ diff --git a/web/src/pages/flow/flow-drawer/index.tsx b/web/src/pages/flow/flow-drawer/index.tsx index 945ca18fd8d..5a2c7e40e05 100644 --- a/web/src/pages/flow/flow-drawer/index.tsx +++ b/web/src/pages/flow/flow-drawer/index.tsx @@ -39,6 +39,7 @@ import OperatorIcon from '../operator-icon'; import { CloseOutlined } from '@ant-design/icons'; import { lowerFirst } from 'lodash'; +import TemplateForm from '../form/template-form'; import { getDrawerWidth } from '../utils'; import styles from './index.less'; @@ -79,6 +80,7 @@ const FormMap = { [Operator.Invoke]: InvokeForm, [Operator.Concentrator]: () => <>, [Operator.Note]: () => <>, + [Operator.Template]: TemplateForm, }; const EmptyContent = () =>
; diff --git a/web/src/pages/flow/flow-id-modal/index.tsx b/web/src/pages/flow/flow-id-modal/index.tsx index fd9dbea4734..7c54a3c448b 100644 --- a/web/src/pages/flow/flow-id-modal/index.tsx +++ b/web/src/pages/flow/flow-id-modal/index.tsx @@ -24,7 +24,7 @@ const FlowIdModal = ({ hideModal }: IModalProps) => { {id} {t('howUseId')} diff --git a/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx b/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx index 427ce04ab0a..c5220ebf6a7 100644 --- a/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx +++ b/web/src/pages/flow/form/generate-form/dynamic-parameters.tsx @@ -36,6 +36,7 @@ const DynamicParameters = ({ nodeId }: IProps) => { title: t('key'), dataIndex: 'key', key: 'key', + width: '40%', onCell: (record: IGenerateParameter) => ({ record, editable: true, @@ -49,6 +50,7 @@ const DynamicParameters = ({ nodeId }: IProps) => { dataIndex: 'component_id', key: 'component_id', align: 'center', + width: '40%', render(text, record) { return (