diff --git a/comps/dataprep/milvus/requirements.txt b/comps/dataprep/milvus/requirements.txt index 48b8be1d9..53e2e73ca 100644 --- a/comps/dataprep/milvus/requirements.txt +++ b/comps/dataprep/milvus/requirements.txt @@ -25,5 +25,5 @@ python-pptx sentence_transformers shortuuid tiktoken -unstructured[all-docs]==0.11.5 +unstructured[all-docs]==0.15.7 uvicorn diff --git a/comps/dataprep/pgvector/langchain/requirements.txt b/comps/dataprep/pgvector/langchain/requirements.txt index b5b4f168b..e89c3947a 100644 --- a/comps/dataprep/pgvector/langchain/requirements.txt +++ b/comps/dataprep/pgvector/langchain/requirements.txt @@ -26,6 +26,6 @@ python-pptx sentence_transformers shortuuid tiktoken -unstructured[all-docs]==0.11.5 +unstructured[all-docs]==0.15.7 uvicorn diff --git a/comps/dataprep/pinecone/requirements.txt b/comps/dataprep/pinecone/requirements.txt index 06ab12ecd..a48c6343f 100644 --- a/comps/dataprep/pinecone/requirements.txt +++ b/comps/dataprep/pinecone/requirements.txt @@ -26,5 +26,5 @@ python-docx python-pptx sentence_transformers shortuuid -unstructured[all-docs]==0.11.5 +unstructured[all-docs]==0.15.7 uvicorn diff --git a/comps/dataprep/qdrant/requirements.txt b/comps/dataprep/qdrant/requirements.txt index e5bcf80b3..0c371dd15 100644 --- a/comps/dataprep/qdrant/requirements.txt +++ b/comps/dataprep/qdrant/requirements.txt @@ -23,5 +23,5 @@ python-pptx qdrant-client sentence_transformers shortuuid -unstructured[all-docs]==0.11.5 +unstructured[all-docs]==0.15.7 uvicorn diff --git a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml index 0ef8a1f1a..4ac5c871c 100644 --- a/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml +++ b/comps/dataprep/redis/langchain/docker/docker-compose-dataprep-redis.yaml @@ -32,8 +32,6 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - REDIS_HOST: ${REDIS_HOST} - REDIS_PORT: ${REDIS_PORT} REDIS_URL: ${REDIS_URL} INDEX_NAME: ${INDEX_NAME} TEI_ENDPOINT: ${TEI_ENDPOINT} diff --git a/comps/dataprep/redis/langchain/requirements.txt b/comps/dataprep/redis/langchain/requirements.txt index 284b9379b..a70486a10 100644 --- a/comps/dataprep/redis/langchain/requirements.txt +++ b/comps/dataprep/redis/langchain/requirements.txt @@ -25,5 +25,5 @@ python-pptx redis sentence_transformers shortuuid -unstructured[all-docs]==0.11.5 +unstructured[all-docs]==0.15.7 uvicorn diff --git a/comps/dataprep/redis/langchain_ray/requirements.txt b/comps/dataprep/redis/langchain_ray/requirements.txt index a77ba5c4d..2ac816fc2 100644 --- a/comps/dataprep/redis/langchain_ray/requirements.txt +++ b/comps/dataprep/redis/langchain_ray/requirements.txt @@ -24,5 +24,6 @@ ray redis sentence_transformers shortuuid +unstructured[all-docs]==0.15.7 uvicorn virtualenv diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py index b300af428..ac8352fa4 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/utils.py @@ -12,6 +12,7 @@ import shutil import signal import subprocess +import tempfile import timeit import unicodedata import urllib.parse @@ -187,8 +188,7 @@ def load_docx(docx_path): if isinstance(r._target, docx.parts.image.ImagePart): rid2img[r.rId] = os.path.basename(r._target.partname) if rid2img: - save_path = "./imgs/" - os.makedirs(save_path, exist_ok=True) + save_path = tempfile.mkdtemp() docx2txt.process(docx_path, save_path) for paragraph in doc.paragraphs: if hasattr(paragraph, "text"):