From 1fee69499137522aca6946275b298a2bf1c01baf Mon Sep 17 00:00:00 2001 From: Nikhil Kulkarni Date: Fri, 29 Apr 2022 16:58:14 -0700 Subject: [PATCH] Provide more information in job queue error and docs (#995) * Provide more information in job queue error and docs * format Java * Trigger build --- docs/configuration.md | 2 +- .../com/amazonaws/ml/mms/http/InferenceRequestHandler.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 783d753ed..77bf13265 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -173,7 +173,7 @@ Most of those properties are designed for performance tuning. Adjusting those nu * number_of_netty_threads: number frontend netty thread, default: number of logical processors available to the JVM. * netty_client_threads: number of backend netty thread, default: number of logical processors available to the JVM. * default_workers_per_model: number of workers to create for each model that loaded at startup time, default: available GPUs in system or number of logical processors available to the JVM. -* job_queue_size: number inference jobs that frontend will queue before backend can serve, default 100. +* job_queue_size: number inference jobs that frontend will queue before backend can serve, default 100. Useful in cases where certain requests take predictably longer to complete. * async_logging: enable asynchronous logging for higher throughput, log output may be delayed if this is enabled, default: false. * default_response_timeout: Timeout, in seconds, used for model's backend workers before they are deemed unresponsive and rebooted. default: 120 seconds. * unregister_model_timeout: Timeout, in seconds, used when handling an unregister model request when cleaning a process before it is deemed unresponsive and an error response is sent. default: 120 seconds. diff --git a/frontend/server/src/main/java/com/amazonaws/ml/mms/http/InferenceRequestHandler.java b/frontend/server/src/main/java/com/amazonaws/ml/mms/http/InferenceRequestHandler.java index 8189a0425..eeb0269a8 100644 --- a/frontend/server/src/main/java/com/amazonaws/ml/mms/http/InferenceRequestHandler.java +++ b/frontend/server/src/main/java/com/amazonaws/ml/mms/http/InferenceRequestHandler.java @@ -173,7 +173,9 @@ private void predict( Job job = new Job(ctx, modelName, WorkerCommands.PREDICT, input); if (!ModelManager.getInstance().addJob(job)) { throw new ServiceUnavailableException( - "No worker is available to serve request: " + modelName); + "No worker is available to serve request for model: " + + modelName + + ". Consider increasing job queue size."); } }