Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Dataprep pinecone CI issue #1067

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions comps/dataprep/pinecone/langchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ docker build -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_pro
### Run Docker with CLI

```bash
docker run -d --name="dataprep-pinecone-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest
docker run -d --name="dataprep-pinecone-server" -p 6007:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/dataprep-pinecone:latest
```

### Setup Environment Variables
Expand All @@ -65,5 +65,5 @@ docker compose -f docker-compose-dataprep-pinecone.yaml up -d
Once document preparation microservice for Pinecone is started, user can use below command to invoke the microservice to convert the document to embedding and save to the database.

```bash
curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6000/v1/dataprep
curl -X POST -H "Content-Type: application/json" -d '{"path":"/path/to/document"}' http://localhost:6007/v1/dataprep
```
2 changes: 0 additions & 2 deletions comps/dataprep/pinecone/langchain/pinecone_langchain.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ services:
container_name: dataprep-pinecone-server
ports:
- "6007:6007"
- "6008:6008"
- "6009:6009"
ipc: host
environment:
no_proxy: ${no_proxy}
Expand Down
14 changes: 6 additions & 8 deletions comps/dataprep/pinecone/langchain/prepare_doc_pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def ingest_data_to_pinecone(doc_path: DocPath):
table_chunks = get_tables_result(path, doc_path.table_strategy)
chunks = chunks + table_chunks
if logflag:
logger.info("Done preprocessing. Created ", len(chunks), " chunks of the original file.")
logger.info(f"Done preprocessing. Created {len(chunks)} chunks of the original file.")

# Create vectorstore
if tei_embedding_endpoint:
Expand All @@ -135,7 +135,7 @@ def ingest_data_to_pinecone(doc_path: DocPath):
# Creating the index
create_index(pc)
if logflag:
logger.info("Successfully created the index", PINECONE_INDEX_NAME)
logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}")

# Batch size
batch_size = 32
Expand Down Expand Up @@ -174,7 +174,7 @@ async def ingest_link_to_pinecone(link_list: List[str], chunk_size, chunk_overla
# Creating the index
create_index(pc)
if logflag:
logger.info("Successfully created the index", PINECONE_INDEX_NAME)
logger.info(f"Successfully created the index {PINECONE_INDEX_NAME}")

# save link contents and doc_ids one by one
for link in link_list:
Expand Down Expand Up @@ -252,7 +252,7 @@ async def ingest_documents(


@register_microservice(
name="opea_service@prepare_doc_pinecone_file", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6008
name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/get_file", host="0.0.0.0", port=6007
)
async def rag_get_file_structure():
if logflag:
Expand All @@ -270,7 +270,7 @@ async def rag_get_file_structure():


@register_microservice(
name="opea_service@prepare_doc_pinecone_del", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6009
name="opea_service@prepare_doc_pinecone", endpoint="/v1/dataprep/delete_file", host="0.0.0.0", port=6007
)
async def delete_all(file_path: str = Body(..., embed=True)):
"""Delete file according to `file_path`.
Expand All @@ -288,7 +288,7 @@ async def delete_all(file_path: str = Body(..., embed=True)):
logger.info("[dataprep - del] successfully delete all files.")
create_upload_folder(upload_folder)
if logflag:
logger.info({"status": True})
logger.info('{"status": True}')
return {"status": True}
else:
raise HTTPException(status_code=404, detail="Single file deletion is not implemented yet")
Expand All @@ -297,5 +297,3 @@ async def delete_all(file_path: str = Body(..., embed=True)):
if __name__ == "__main__":
create_upload_folder(upload_folder)
opea_microservices["opea_service@prepare_doc_pinecone"].start()
opea_microservices["opea_service@prepare_doc_pinecone_file"].start()
opea_microservices["opea_service@prepare_doc_pinecone_del"].start()
4 changes: 2 additions & 2 deletions tests/dataprep/test_dataprep_pinecone_langchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function start_service() {
export PINECONE_INDEX_NAME="test-index"
export HUGGINGFACEHUB_API_TOKEN=$HF_TOKEN

docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 -p 5040:6008 -p 5041:6009 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME opea/dataprep-pinecone:comps
docker run -d --name="test-comps-dataprep-pinecone" -p 5039:6007 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e PINECONE_API_KEY=$PINECONE_API_KEY -e PINECONE_INDEX_NAME=$PINECONE_INDEX_NAME -e LOGFLAG=true opea/dataprep-pinecone:comps

sleep 1m
}
Expand All @@ -41,7 +41,7 @@ function validate_microservice() {
docker logs test-comps-dataprep-pinecone
exit 1
fi
DELETE_URL="http://$ip_address:5041/v1/dataprep/delete_file"
DELETE_URL="http://$ip_address:5039/v1/dataprep/delete_file"
result=$(curl --noproxy $ip_address --location --request POST \
-d '{"file_path": "all"}' -H 'Content-Type: application/json' $DELETE_URL)
if [[ $result == *"true"* ]]; then
Expand Down
Loading