From 52abf1405031b2fa924fdbb7110f0cd13883fe5f Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 10:47:00 -0700 Subject: [PATCH 1/7] fixing metrics Signed-off-by: Dhrubo Saha --- .../java/org/opensearch/ml/model/MLModelManager.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java index 07720d45f8..6cda6a97f0 100644 --- a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java +++ b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java @@ -318,9 +318,10 @@ private void uploadMLModelMeta(MLRegisterModelMetaInput mlRegisterModelMetaInput * @param mlTask ML task */ public void registerMLModel(MLRegisterModelInput registerModelInput, MLTask mlTask) { - mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); + checkAndAddRunningTask(mlTask, maxRegisterTasksPerNode); try { + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment(); mlStats.createCounterStatIfAbsent(mlTask.getFunctionName(), REGISTER, ML_ACTION_REQUEST_COUNT).increment(); @@ -380,7 +381,6 @@ public void registerMLModel(MLRegisterModelInput registerModelInput, MLTask mlTa handleException(registerModelInput.getFunctionName(), mlTask.getTaskId(), e); } } catch (Exception e) { - mlStats.createCounterStatIfAbsent(mlTask.getFunctionName(), REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); handleException(registerModelInput.getFunctionName(), mlTask.getTaskId(), e); } finally { mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment(); @@ -392,9 +392,9 @@ private void indexRemoteModel(MLRegisterModelInput registerModelInput, MLTask ml FunctionName functionName = mlTask.getFunctionName(); try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) { mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); - mlStats.createCounterStatIfAbsent(functionName, REGISTER, ML_ACTION_REQUEST_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment(); + String modelName = registerModelInput.getModelName(); String version = modelVersion == null ? registerModelInput.getVersion() : modelVersion; Instant now = Instant.now(); @@ -462,7 +462,6 @@ private void registerModelFromUrl(MLRegisterModelInput registerModelInput, MLTas FunctionName functionName = mlTask.getFunctionName(); try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) { mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); - mlStats.createCounterStatIfAbsent(functionName, REGISTER, ML_ACTION_REQUEST_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment(); String modelName = registerModelInput.getModelName(); @@ -690,6 +689,7 @@ private void deleteModel(String modelId) { private void handleException(FunctionName functionName, String taskId, Exception e) { mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); Map updated = ImmutableMap.of(ERROR_FIELD, MLExceptionUtils.getRootCauseMessage(e), STATE_FIELD, FAILED); mlTaskManager.updateMLTask(taskId, updated, TIMEOUT_IN_MILLIS, true); } @@ -713,6 +713,7 @@ public void deployModel( ActionListener listener ) { mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, ML_ACTION_REQUEST_COUNT).increment(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); List workerNodes = mlTask.getWorkerNodes(); if (modelCacheHelper.isModelDeployed(modelId)) { if (workerNodes != null && workerNodes.size() > 0) { @@ -836,6 +837,7 @@ public void deployModel( private void handleDeployModelException(String modelId, FunctionName functionName, ActionListener listener, Exception e) { mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); removeModel(modelId); listener.onFailure(e); } @@ -1045,6 +1047,7 @@ public synchronized Map undeployModel(String[] modelIds) { if (modelCacheHelper.isModelDeployed(modelId)) { modelUndeployStatus.put(modelId, UNDEPLOYED); mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_MODEL_COUNT).decrement(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment(); mlStats .createCounterStatIfAbsent(getModelFunctionName(modelId), ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT) .increment(); From fed820e79b3ce154cb489bd6cdd3d2726d18a0af Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 14:19:08 -0700 Subject: [PATCH 2/7] addressing comments Signed-off-by: Dhrubo Saha --- .../opensearch/ml/model/MLModelManager.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java index 6cda6a97f0..033917b9e5 100644 --- a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java +++ b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java @@ -94,6 +94,7 @@ import org.opensearch.ml.common.MLTask; import org.opensearch.ml.common.connector.Connector; import org.opensearch.ml.common.exception.MLException; +import org.opensearch.ml.common.exception.MLLimitExceededException; import org.opensearch.ml.common.exception.MLResourceNotFoundException; import org.opensearch.ml.common.exception.MLValidationException; import org.opensearch.ml.common.model.MLModelState; @@ -688,8 +689,10 @@ private void deleteModel(String modelId) { } private void handleException(FunctionName functionName, String taskId, Exception e) { - mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); - mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); + if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException)) { + mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); + } Map updated = ImmutableMap.of(ERROR_FIELD, MLExceptionUtils.getRootCauseMessage(e), STATE_FIELD, FAILED); mlTaskManager.updateMLTask(taskId, updated, TIMEOUT_IN_MILLIS, true); } @@ -824,20 +827,23 @@ public void deployModel( } }, e -> { log.error("Failed to retrieve model " + modelId, e); - handleDeployModelException(modelId, functionName, listener, e); + handleDeployModelException(modelId, functionName, listener, e, false); })); }, e -> { log.error("Failed to deploy model " + modelId, e); - handleDeployModelException(modelId, functionName, listener, e); + handleDeployModelException(modelId, functionName, listener, e, false); }))); } catch (Exception e) { - handleDeployModelException(modelId, functionName, listener, e); + handleDeployModelException(modelId, functionName, listener, e, true); } } private void handleDeployModelException(String modelId, FunctionName functionName, ActionListener listener, Exception e) { - mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); - mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); + + if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException)) { + mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); + mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); + } removeModel(modelId); listener.onFailure(e); } @@ -860,7 +866,7 @@ public void getModel(String modelId, ActionListener listener) { } /** - * Get model from model index with includes/exludes filter. + * Get model from model index with includes/excludes filter. * * @param modelId model id * @param includes fields included From c4b3c353b0994cd2b584f13a41def6d744c7450c Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 14:23:04 -0700 Subject: [PATCH 3/7] addressing comments Signed-off-by: Dhrubo Saha --- .../main/java/org/opensearch/ml/model/MLModelManager.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java index 033917b9e5..d3321ffd60 100644 --- a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java +++ b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java @@ -827,14 +827,14 @@ public void deployModel( } }, e -> { log.error("Failed to retrieve model " + modelId, e); - handleDeployModelException(modelId, functionName, listener, e, false); + handleDeployModelException(modelId, functionName, listener, e); })); }, e -> { log.error("Failed to deploy model " + modelId, e); - handleDeployModelException(modelId, functionName, listener, e, false); + handleDeployModelException(modelId, functionName, listener, e); }))); } catch (Exception e) { - handleDeployModelException(modelId, functionName, listener, e, true); + handleDeployModelException(modelId, functionName, listener, e); } } From b27d655d48208911ffff601ca24e48fff7b102d6 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 15:42:53 -0700 Subject: [PATCH 4/7] updating test Signed-off-by: Dhrubo Saha --- .../test/java/org/opensearch/ml/model/MLModelManagerTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugin/src/test/java/org/opensearch/ml/model/MLModelManagerTests.java b/plugin/src/test/java/org/opensearch/ml/model/MLModelManagerTests.java index 9f5cb8c441..cc0b66a3fe 100644 --- a/plugin/src/test/java/org/opensearch/ml/model/MLModelManagerTests.java +++ b/plugin/src/test/java/org/opensearch/ml/model/MLModelManagerTests.java @@ -733,7 +733,8 @@ private void testDeployModel_FailedToRetrieveModelChunks(boolean lastChunk) { modelManager.deployModel(modelId, modelContentHashValue, functionName, true, mlTask, listener); verify(modelCacheHelper).removeModel(eq(modelId)); - verify(mlStats).createCounterStatIfAbsent(eq(functionName), eq(ActionName.DEPLOY), eq(MLActionLevelStat.ML_ACTION_FAILURE_COUNT)); + verify(mlStats).createCounterStatIfAbsent(eq(functionName), eq(ActionName.DEPLOY), eq(MLActionLevelStat.ML_ACTION_REQUEST_COUNT)); + verify(mlStats).getStat(eq(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT)); } private void mock_client_index_ModelChunkFailure(Client client, String modelId) { From d236cd6f021ddd7bdfa2b42dd46a18d809633a50 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 17:33:35 -0700 Subject: [PATCH 5/7] added IllegalArgumentException in the if statement Signed-off-by: Dhrubo Saha --- .../main/java/org/opensearch/ml/model/MLModelManager.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java index d3321ffd60..62302a261e 100644 --- a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java +++ b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java @@ -689,7 +689,8 @@ private void deleteModel(String modelId) { } private void handleException(FunctionName functionName, String taskId, Exception e) { - if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException)) { + if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException) && + !(e instanceof IllegalArgumentException)) { mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); } @@ -840,7 +841,8 @@ public void deployModel( private void handleDeployModelException(String modelId, FunctionName functionName, ActionListener listener, Exception e) { - if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException)) { + if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException) && + !(e instanceof IllegalArgumentException)) { mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); } From de0a9961f0b9887a51f0b3e11ba079ecec9088f9 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 19:16:54 -0700 Subject: [PATCH 6/7] addressing comments Signed-off-by: Dhrubo Saha --- .../java/org/opensearch/ml/model/MLModelManager.java | 10 ++++++---- .../java/org/opensearch/ml/utils/MLExceptionUtils.java | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java index 62302a261e..c59aa6e8e1 100644 --- a/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java +++ b/plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java @@ -689,8 +689,9 @@ private void deleteModel(String modelId) { } private void handleException(FunctionName functionName, String taskId, Exception e) { - if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException) && - !(e instanceof IllegalArgumentException)) { + if (!(e instanceof MLLimitExceededException) + && !(e instanceof MLResourceNotFoundException) + && !(e instanceof IllegalArgumentException)) { mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); } @@ -841,8 +842,9 @@ public void deployModel( private void handleDeployModelException(String modelId, FunctionName functionName, ActionListener listener, Exception e) { - if (!(e instanceof MLLimitExceededException) && !(e instanceof MLResourceNotFoundException) && - !(e instanceof IllegalArgumentException)) { + if (!(e instanceof MLLimitExceededException) + && !(e instanceof MLResourceNotFoundException) + && !(e instanceof IllegalArgumentException)) { mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment(); mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment(); } diff --git a/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java b/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java index de2c713d5e..3799b0955a 100644 --- a/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java +++ b/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java @@ -44,9 +44,9 @@ public static String toJsonString(Map nodeErrors) throws IOExcep public static void logException(String errorMessage, Exception e, Logger log) { Throwable rootCause = ExceptionUtils.getRootCause(e); - if (e instanceof MLLimitExceededException || e instanceof MLResourceNotFoundException) { + if (e instanceof MLLimitExceededException || e instanceof MLResourceNotFoundException || e instanceof IllegalArgumentException) { log.warn(e.getMessage()); - } else if (rootCause instanceof MLLimitExceededException || rootCause instanceof MLResourceNotFoundException) { + } else if (rootCause instanceof MLLimitExceededException || rootCause instanceof MLResourceNotFoundException || rootCause instanceof IllegalArgumentException) { log.warn(rootCause.getMessage()); } else { log.error(errorMessage, e); From 6e08ffa1880ff2a326b895c8a79b74a7d7ee89d2 Mon Sep 17 00:00:00 2001 From: Dhrubo Saha Date: Wed, 9 Aug 2023 19:30:24 -0700 Subject: [PATCH 7/7] fixing spotless Signed-off-by: Dhrubo Saha --- .../main/java/org/opensearch/ml/utils/MLExceptionUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java b/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java index 3799b0955a..e831388b86 100644 --- a/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java +++ b/plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java @@ -46,7 +46,9 @@ public static void logException(String errorMessage, Exception e, Logger log) { Throwable rootCause = ExceptionUtils.getRootCause(e); if (e instanceof MLLimitExceededException || e instanceof MLResourceNotFoundException || e instanceof IllegalArgumentException) { log.warn(e.getMessage()); - } else if (rootCause instanceof MLLimitExceededException || rootCause instanceof MLResourceNotFoundException || rootCause instanceof IllegalArgumentException) { + } else if (rootCause instanceof MLLimitExceededException + || rootCause instanceof MLResourceNotFoundException + || rootCause instanceof IllegalArgumentException) { log.warn(rootCause.getMessage()); } else { log.error(errorMessage, e);