From 3367b762762180381c607c7198c0bf4761b682e6 Mon Sep 17 00:00:00 2001 From: WenjiaoYue Date: Wed, 4 Sep 2024 18:48:48 +0800 Subject: [PATCH] update upload_training_files format (#613) * update upload_training_files format Signed-off-by: Yue, Wenjiao --- comps/cores/proto/api_protocol.py | 36 +++++++++++++++++++++++++ comps/finetuning/README.md | 2 +- comps/finetuning/finetuning_service.py | 30 ++++++--------------- comps/finetuning/handlers.py | 37 ++++++++++++++++++++++++-- tests/test_finetuning.sh | 15 ++++++++--- 5 files changed, 92 insertions(+), 28 deletions(-) diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 0a8b2de00..2b2481067 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -761,3 +761,39 @@ class FineTuningJobList(BaseModel): If true, additional requests can be made to retrieve more jobs. """ + + +class UploadFileRequest(BaseModel): + purpose: str + """The intended purpose of the uploaded file. + + Use "assistants" for Assistants and Message files, "vision" for Assistants image file inputs, "batch" for Batch API, and "fine-tune" for Fine-tuning. + """ + + file: UploadFile + """The File object (not file name) to be uploaded.""" + + +class FileObject(BaseModel): + # Ordered by official OpenAI API documentation + # https://platform.openai.com/docs/api-reference/files/object + id: str + """The file identifier, which can be referenced in the API endpoints.""" + + bytes: int + """The size of the file, in bytes.""" + + created_at: int + """The Unix timestamp (in seconds) for when the file was created.""" + + filename: str + """The name of the file.""" + + object: str = "file" + """The object type, which is always file.""" + + purpose: str + """The intended purpose of the file. + + Supported values are assistants, assistants_output, batch, batch_output, fine-tune, fine-tune-results and vision. + """ diff --git a/comps/finetuning/README.md b/comps/finetuning/README.md index 33da63c29..4232c82be 100644 --- a/comps/finetuning/README.md +++ b/comps/finetuning/README.md @@ -93,7 +93,7 @@ Assuming a training file `alpaca_data.json` is uploaded, it can be downloaded in ```bash # upload a training file -curl http://${your_ip}:8015/v1/finetune/upload_training_files -X POST -H "Content-Type: multipart/form-data" -F "files=@./alpaca_data.json" +curl http://${your_ip}:8015/v1/files -X POST -H "Content-Type: multipart/form-data" -F "file=@./alpaca_data.json" -F purpose="fine-tune" # create a finetuning job curl http://${your_ip}:8015/v1/fine_tuning/jobs \ diff --git a/comps/finetuning/finetuning_service.py b/comps/finetuning/finetuning_service.py index bd1423d21..958b08acb 100644 --- a/comps/finetuning/finetuning_service.py +++ b/comps/finetuning/finetuning_service.py @@ -1,23 +1,18 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -import os -import urllib.parse -from typing import List, Optional, Union - -from fastapi import BackgroundTasks, File, UploadFile +from fastapi import BackgroundTasks, Depends from comps import opea_microservices, register_microservice -from comps.cores.proto.api_protocol import FineTuningJobIDRequest +from comps.cores.proto.api_protocol import FineTuningJobIDRequest, UploadFileRequest from comps.finetuning.finetune_config import FineTuningParams from comps.finetuning.handlers import ( - DATASET_BASE_PATH, handle_cancel_finetuning_job, handle_create_finetuning_jobs, handle_list_finetuning_checkpoints, handle_list_finetuning_jobs, handle_retrieve_finetuning_job, - save_content_to_local_disk, + handle_upload_training_files, + upload_file, ) @@ -51,22 +46,13 @@ def cancel_finetuning_job(request: FineTuningJobIDRequest): @register_microservice( name="opea_service@finetuning", - endpoint="/v1/finetune/upload_training_files", + endpoint="/v1/files", host="0.0.0.0", port=8015, ) -async def upload_training_files( - files: Optional[Union[UploadFile, List[UploadFile]]] = File(None), -): - if files: - if not isinstance(files, list): - files = [files] - for file in files: - filename = urllib.parse.quote(file.filename, safe="") - save_path = os.path.join(DATASET_BASE_PATH, filename) - await save_content_to_local_disk(save_path, file) - - return {"status": 200, "message": "Training files uploaded."} +async def upload_training_files(request: UploadFileRequest = Depends(upload_file)): + uploadFileInfo = await handle_upload_training_files(request) + return uploadFileInfo @register_microservice( diff --git a/comps/finetuning/handlers.py b/comps/finetuning/handlers.py index 406bcd245..f501f511b 100644 --- a/comps/finetuning/handlers.py +++ b/comps/finetuning/handlers.py @@ -4,16 +4,24 @@ import os import random import time +import urllib.parse import uuid from pathlib import Path from typing import Dict -from fastapi import BackgroundTasks, HTTPException +from fastapi import BackgroundTasks, File, Form, HTTPException, UploadFile from pydantic_yaml import parse_yaml_raw_as, to_yaml_file from ray.job_submission import JobSubmissionClient from comps import CustomLogger -from comps.cores.proto.api_protocol import FineTuningJob, FineTuningJobIDRequest, FineTuningJobList +from comps.cores.proto.api_protocol import ( + FileObject, + FineTuningJob, + FineTuningJobIDRequest, + FineTuningJobList, + FineTuningJobsRequest, + UploadFileRequest, +) from comps.finetuning.finetune_config import FinetuneConfig, FineTuningParams logger = CustomLogger("finetuning_handlers") @@ -185,3 +193,28 @@ def handle_list_finetuning_checkpoints(request: FineTuningJobIDRequest): if os.path.exists(output_dir): checkpoints = os.listdir(output_dir) return checkpoints + + +async def upload_file(purpose: str = Form(...), file: UploadFile = File(...)): + return UploadFileRequest(purpose=purpose, file=file) + + +async def handle_upload_training_files(request: UploadFileRequest): + file = request.file + if file is None: + raise HTTPException(status_code=404, detail="upload file failed!") + filename = urllib.parse.quote(file.filename, safe="") + save_path = os.path.join(DATASET_BASE_PATH, filename) + await save_content_to_local_disk(save_path, file) + + fileBytes = os.path.getsize(save_path) + fileInfo = FileObject( + id=f"file-{uuid.uuid4()}", + object="file", + bytes=fileBytes, + created_at=int(time.time()), + filename=filename, + purpose="fine-tune", + ) + + return fileInfo diff --git a/tests/test_finetuning.sh b/tests/test_finetuning.sh index 3ddbdfcf0..38e1aa405 100644 --- a/tests/test_finetuning.sh +++ b/tests/test_finetuning.sh @@ -33,13 +33,21 @@ function validate_microservice() { export no_proxy="localhost,127.0.0.1,"${ip_address} # test /v1/dataprep upload file - URL="http://${ip_address}:$finetuning_service_port/v1/finetune/upload_training_files" + URL="http://${ip_address}:$finetuning_service_port/v1/files" echo '[{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."},{"instruction": "Give three tips for staying healthy.", "input": "", "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."}]' > $LOG_PATH/test_data.json - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./test_data.json' -H 'Content-Type: multipart/form-data' "$URL") + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'file=@./test_data.json' -F purpose="fine-tune" -H 'Content-Type: multipart/form-data' "$URL") HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') SERVICE_NAME="finetuning-server - upload - file" + # Parse the JSON response + purpose=$(echo "$RESPONSE_BODY" | jq -r '.purpose') + filename=$(echo "$RESPONSE_BODY" | jq -r '.filename') + + # Define expected values + expected_purpose="fine-tune" + expected_filename="test_data.json" + if [ "$HTTP_STATUS" -ne "200" ]; then echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log @@ -47,7 +55,8 @@ function validate_microservice() { else echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." fi - if [[ "$RESPONSE_BODY" != *"Training files uploaded"* ]]; then + # Check if the parsed values match the expected values + if [[ "$purpose" != "$expected_purpose" || "$filename" != "$expected_filename" ]]; then echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" docker logs finetuning-server >> ${LOG_PATH}/finetuning-server_upload_file.log exit 1