From 2c5164f882b08fc31fea46c591c7bc66b0bc3425 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Fri, 13 Jan 2023 22:18:33 -0800 Subject: [PATCH] [FIX] Fix exception handling in alpa serve (#855) --- alpa/serve/controller.py | 9 +++++---- alpa/serve/http_util.py | 3 +-- examples/llm_serving/client.py | 2 +- examples/llm_serving/launch_model_worker.py | 4 ---- examples/llm_serving/launch_website.py | 4 ++-- examples/llm_serving/service/static/index.html | 4 ++-- examples/llm_serving/test_completions.py | 2 +- examples/llm_serving/test_logprobs.py | 2 +- 8 files changed, 13 insertions(+), 17 deletions(-) diff --git a/alpa/serve/controller.py b/alpa/serve/controller.py index 2038f1af1..741903558 100644 --- a/alpa/serve/controller.py +++ b/alpa/serve/controller.py @@ -193,10 +193,11 @@ async def handle_asgi(self, scope, receive, send): response = await manager.handle_request.remote( name, request_wrapper) - if isinstance(response, Exception): - raise response - - status_code = 200 + if isinstance(response, RelayException): + response = make_error_response(response) + status_code = 400 + else: + status_code = 200 except Exception as e: # pylint: disable=broad-except response = make_error_response(e) status_code = 400 diff --git a/alpa/serve/http_util.py b/alpa/serve/http_util.py index d8c226472..5026dfa2b 100644 --- a/alpa/serve/http_util.py +++ b/alpa/serve/http_util.py @@ -371,8 +371,7 @@ def __init__(self, e): def make_error_response(e): if isinstance(e, RelayException): msg = str(e.e) - stacktrace = "".join(traceback.format_tb( - e.__traceback__)) + e.stacktrace + stacktrace = e.stacktrace else: msg = str(e) stacktrace = "".join(traceback.format_tb(e.__traceback__)) diff --git a/examples/llm_serving/client.py b/examples/llm_serving/client.py index 92bb440c4..ce2c70d1e 100644 --- a/examples/llm_serving/client.py +++ b/examples/llm_serving/client.py @@ -3,7 +3,7 @@ import requests -DEFAULT_URL = "https://opt.alpa.ai" +DEFAULT_URL = "https://api.alpa.ai" headers = {"User-Agent": "Alpa Client"} diff --git a/examples/llm_serving/launch_model_worker.py b/examples/llm_serving/launch_model_worker.py index 537005083..2e9388f60 100644 --- a/examples/llm_serving/launch_model_worker.py +++ b/examples/llm_serving/launch_model_worker.py @@ -190,10 +190,6 @@ def normalize_prompts(self, prompts): async def completions(self, args, request): logger = self.logger - if "redirect_logprobs" in args: - # A redirection to workaround some security settings. - return await self.logprobs(args, request) - # Normalize prompts prompts = args["prompt"] prompts = self.normalize_prompts(prompts) diff --git a/examples/llm_serving/launch_website.py b/examples/llm_serving/launch_website.py index b63aa7907..1728a9c8f 100644 --- a/examples/llm_serving/launch_website.py +++ b/examples/llm_serving/launch_website.py @@ -40,7 +40,7 @@ def log_scope(request): import pickle import time -from alpa.serve.http_util import HTTPRequestWrapper, make_error_response +from alpa.serve.http_util import HTTPRequestWrapper, make_error_response, RelayException import ray from starlette.responses import JSONResponse ray.init(address="auto", namespace="alpa_serve") @@ -69,7 +69,7 @@ async def redirect(request): ret = await manager.handle_request.remote("default", request) except ray.exceptions.RayActorError: manager = None - if isinstance(ret, Exception): + if isinstance(ret, RelayException): ret = make_error_response(ret) ret = JSONResponse(ret, status_code=400) return ret diff --git a/examples/llm_serving/service/static/index.html b/examples/llm_serving/service/static/index.html index ee9e8e28b..c5f15262d 100644 --- a/examples/llm_serving/service/static/index.html +++ b/examples/llm_serving/service/static/index.html @@ -181,14 +181,14 @@ msg.includes("is not registered") || msg.includes("object has no attribute")) { msg += "\nThe server is probably under regular maintenance. " + - "Please come back 10 minutes later."; + "Please come back later."; } $("#error").text(msg); } else { $("#error").text( "Cannot connect to the server due to unknown errors. " + "\nThe server is probably under regular maintenance. " + - "Please come back 10 minutes later."); + "Please come back later."); } } }); diff --git a/examples/llm_serving/test_completions.py b/examples/llm_serving/test_completions.py index 1b6570f2f..78326e392 100644 --- a/examples/llm_serving/test_completions.py +++ b/examples/llm_serving/test_completions.py @@ -2,7 +2,7 @@ Usage: python3 test_completions.py --url http://localhost:20001 -python3 test_completions.py --url https://opt.alpa.ai --api-key YOUR_KEY +python3 test_completions.py --url https://api.alpa.ai --api-key YOUR_KEY """ import argparse diff --git a/examples/llm_serving/test_logprobs.py b/examples/llm_serving/test_logprobs.py index 7b798578a..6c00ff61d 100644 --- a/examples/llm_serving/test_logprobs.py +++ b/examples/llm_serving/test_logprobs.py @@ -2,7 +2,7 @@ Usage: python3 test_logprobs.py --url http://localhost:20001 -python3 test_logprobs.py --url https://opt.alpa.ai --api-key YOUR_KEY +python3 test_logprobs.py --url https://api.alpa.ai --api-key YOUR_KEY """ import argparse import time