From 78a304a95d546db31139a135a1f031890a55ae72 Mon Sep 17 00:00:00 2001 From: Pavan Yekbote Date: Tue, 26 Nov 2024 16:59:06 -0800 Subject: [PATCH] [Enhancement] Fetch system index mappings from json file instead of string constants (#3153) * feat(index mappings): fetch mappings and version from json file instead of string constants Signed-off-by: Pavan Yekbote * refactor: changing exception being thrown Signed-off-by: Pavan Yekbote * chore: remove unused file Signed-off-by: Pavan Yekbote * chore: fix typo in comment Signed-off-by: Pavan Yekbote * chore: adding new line at the end of files Signed-off-by: Pavan Yekbote * feat: add test cases Signed-off-by: Pavan Yekbote * fix: remove test code Signed-off-by: Pavan Yekbote * fix(test): in main the versions were not updated appropriately Signed-off-by: Pavan Yekbote * refactor: move mapping templates under common module Signed-off-by: Pavan Yekbote * refactor: ensure that conversationindexconstants reference mlindex enums rather than use their own mappings Signed-off-by: Pavan Yekbote * refactor: update comment Signed-off-by: Pavan Yekbote * refactor: rename dir from mappings to index-mappings Signed-off-by: Pavan Yekbote * fix: add null checks Signed-off-by: Pavan Yekbote * fix: adding dependencies for testing Signed-off-by: Pavan Yekbote * fix(test): compare json object rather than strings to avoid eol character issue Signed-off-by: Pavan Yekbote * refactor: combine if statements into single check Signed-off-by: Pavan Yekbote * refactoring: null handling + clean code Signed-off-by: Pavan Yekbote * spotless apply Signed-off-by: Pavan Yekbote --------- Signed-off-by: Pavan Yekbote --- common/build.gradle | 1 + .../org/opensearch/ml/common/CommonValue.java | 539 +----------------- .../org/opensearch/ml/common}/MLIndex.java | 71 ++- .../ConversationalIndexConstants.java | 75 +-- .../ml/common/utils/IndexUtils.java | 43 ++ .../ml/common/utils/StringUtils.java | 23 +- .../resources/index-mappings/ml-agent.json | 45 ++ .../resources/index-mappings/ml-config.json | 24 + .../index-mappings/ml-connector.json | 95 +++ .../index-mappings/ml-controller.json | 10 + .../index-mappings/ml-memory-message.json | 35 ++ .../index-mappings/ml-memory-meta.json | 27 + .../index-mappings/ml-model-group.json | 83 +++ .../resources/index-mappings/ml-model.json | 243 ++++++++ .../resources/index-mappings/ml-task.json | 86 +++ .../ml/common/utils/IndexUtilsTest.java | 110 ++++ .../test-mapping-malformed.json | 13 + .../index-mappings/test-mapping.json | 16 + memory/build.gradle | 1 + .../MetricsCorrelation.java | 4 +- .../ml/engine/indices/MLIndicesHandler.java | 1 + .../engine/indices/MLIndicesHandlerTest.java | 2 +- 22 files changed, 912 insertions(+), 635 deletions(-) rename {ml-algorithms/src/main/java/org/opensearch/ml/engine/indices => common/src/main/java/org/opensearch/ml/common}/MLIndex.java (56%) create mode 100644 common/src/main/resources/index-mappings/ml-agent.json create mode 100644 common/src/main/resources/index-mappings/ml-config.json create mode 100644 common/src/main/resources/index-mappings/ml-connector.json create mode 100644 common/src/main/resources/index-mappings/ml-controller.json create mode 100644 common/src/main/resources/index-mappings/ml-memory-message.json create mode 100644 common/src/main/resources/index-mappings/ml-memory-meta.json create mode 100644 common/src/main/resources/index-mappings/ml-model-group.json create mode 100644 common/src/main/resources/index-mappings/ml-model.json create mode 100644 common/src/main/resources/index-mappings/ml-task.json create mode 100644 common/src/test/resources/index-mappings/test-mapping-malformed.json create mode 100644 common/src/test/resources/index-mappings/test-mapping.json diff --git a/common/build.gradle b/common/build.gradle index 60edb3101a..979752bc05 100644 --- a/common/build.gradle +++ b/common/build.gradle @@ -26,6 +26,7 @@ dependencies { compileOnly group: 'org.apache.commons', name: 'commons-text', version: '1.10.0' compileOnly group: 'com.google.code.gson', name: 'gson', version: '2.10.1' compileOnly group: 'org.json', name: 'json', version: '20231013' + testImplementation group: 'org.json', name: 'json', version: '20231013' implementation('com.google.guava:guava:32.1.2-jre') { exclude group: 'com.google.guava', module: 'failureaccess' exclude group: 'com.google.code.findbugs', module: 'jsr305' diff --git a/common/src/main/java/org/opensearch/ml/common/CommonValue.java b/common/src/main/java/org/opensearch/ml/common/CommonValue.java index 3adaa8ca2e..e06e552536 100644 --- a/common/src/main/java/org/opensearch/ml/common/CommonValue.java +++ b/common/src/main/java/org/opensearch/ml/common/CommonValue.java @@ -5,39 +5,9 @@ package org.opensearch.ml.common; -import static org.opensearch.ml.common.MLConfig.CONFIG_TYPE_FIELD; -import static org.opensearch.ml.common.MLConfig.LAST_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.MLConfig.ML_CONFIGURATION_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.APPLICATION_TYPE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ADDITIONAL_INFO_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CONVERSATION_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_CREATE_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_INPUT_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_ORIGIN_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_PROMPT_TEMPLATE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_RESPONSE_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.INTERACTIONS_TRACE_NUMBER_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_CREATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_INDEX_SCHEMA_VERSION; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_NAME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.META_UPDATED_TIME_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.PARENT_INTERACTIONS_ID_FIELD; -import static org.opensearch.ml.common.conversation.ConversationalIndexConstants.USER_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.ALL_CONFIG_FIELD; -import static org.opensearch.ml.common.model.MLModelConfig.MODEL_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.EMBEDDING_DIMENSION_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.FRAMEWORK_TYPE_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.MODEL_MAX_LENGTH_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.NORMALIZE_RESULT_FIELD; -import static org.opensearch.ml.common.model.TextEmbeddingModelConfig.POOLING_MODE_FIELD; - import java.util.Set; import org.opensearch.Version; -import org.opensearch.ml.common.agent.MLAgent; -import org.opensearch.ml.common.connector.AbstractConnector; -import org.opensearch.ml.common.controller.MLController; import com.google.common.collect.ImmutableSet; @@ -63,516 +33,27 @@ public class CommonValue { public static final String ML_MODEL_GROUP_INDEX = ".plugins-ml-model-group"; public static final String ML_MODEL_INDEX = ".plugins-ml-model"; public static final String ML_TASK_INDEX = ".plugins-ml-task"; - public static final Integer ML_MODEL_GROUP_INDEX_SCHEMA_VERSION = 2; - public static final Integer ML_MODEL_INDEX_SCHEMA_VERSION = 11; public static final String ML_CONNECTOR_INDEX = ".plugins-ml-connector"; - public static final Integer ML_TASK_INDEX_SCHEMA_VERSION = 3; - public static final Integer ML_CONNECTOR_SCHEMA_VERSION = 3; public static final String ML_CONFIG_INDEX = ".plugins-ml-config"; - public static final Integer ML_CONFIG_INDEX_SCHEMA_VERSION = 4; public static final String ML_CONTROLLER_INDEX = ".plugins-ml-controller"; - public static final Integer ML_CONTROLLER_INDEX_SCHEMA_VERSION = 1; public static final String ML_MAP_RESPONSE_KEY = "response"; public static final String ML_AGENT_INDEX = ".plugins-ml-agent"; - public static final Integer ML_AGENT_INDEX_SCHEMA_VERSION = 2; public static final String ML_MEMORY_META_INDEX = ".plugins-ml-memory-meta"; - public static final Integer ML_MEMORY_META_INDEX_SCHEMA_VERSION = 1; public static final String ML_MEMORY_MESSAGE_INDEX = ".plugins-ml-memory-message"; public static final String ML_STOP_WORDS_INDEX = ".plugins-ml-stop-words"; public static final Set stopWordsIndices = ImmutableSet.of(".plugins-ml-stop-words"); - public static final Integer ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION = 1; - public static final String USER_FIELD_MAPPING = " \"" - + CommonValue.USER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " }\n"; - public static final String ML_MODEL_GROUP_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + ML_MODEL_GROUP_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + MLModelGroup.MODEL_GROUP_NAME_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.DESCRIPTION_FIELD - + "\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"" - + MLModelGroup.LATEST_VERSION_FIELD - + "\": {\n" - + " \"type\": \"integer\"\n" - + " },\n" - + " \"" - + MLModelGroup.MODEL_GROUP_ID_FIELD - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.CREATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModelGroup.LAST_UPDATED_TIME_FIELD - + "\": {\n" - + " \"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_FIELDS = " \"properties\": {\n" - + " \"" - + AbstractConnector.NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + AbstractConnector.VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + AbstractConnector.PROTOCOL_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + AbstractConnector.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CREDENTIAL_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.CLIENT_CONFIG_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + AbstractConnector.ACTIONS_FIELD - + "\" : {\"type\": \"flat_object\"}\n"; - - public static final String ML_MODEL_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_MODEL_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLModel.ALGORITHM_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLModel.OLD_MODEL_VERSION_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_VERSION_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_GROUP_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_FIELD - + "\" : {\"type\": \"binary\"},\n" - + " \"" - + MLModel.CHUNK_NUMBER_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.TOTAL_CHUNKS_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.MODEL_ID_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLModel.MODEL_FORMAT_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_STATE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_SIZE_IN_BYTES_FIELD - + "\" : {\"type\": \"long\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CURRENT_WORKER_NODE_COUNT_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.PLANNING_WORKER_NODES_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.DEPLOY_TO_ALL_NODES_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.MODEL_CONFIG_FIELD - + "\" : {\"properties\":{\"" - + MODEL_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + EMBEDDING_DIMENSION_FIELD - + "\":{\"type\":\"integer\"},\"" - + FRAMEWORK_TYPE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + POOLING_MODE_FIELD - + "\":{\"type\":\"keyword\"},\"" - + NORMALIZE_RESULT_FIELD - + "\":{\"type\":\"boolean\"},\"" - + MODEL_MAX_LENGTH_FIELD - + "\":{\"type\":\"integer\"},\"" - + ALL_CONFIG_FIELD - + "\":{\"type\":\"text\"}}},\n" - + " \"" - + MLModel.DEPLOY_SETTING_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.IS_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.IS_CONTROLLER_ENABLED_FIELD - + "\" : {\"type\": \"boolean\"},\n" - + " \"" - + MLModel.RATE_LIMITER_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.MODEL_CONTENT_HASH_VALUE_FIELD - + "\" : {\"type\": \"keyword\"},\n" - + " \"" - + MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD - + "\" : {\"type\": \"integer\"},\n" - + " \"" - + MLModel.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_REGISTERED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_DEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.LAST_UNDEPLOYED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLModel.INTERFACE_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + MLModel.GUARDRAILS_FIELD - + "\" : {\n" - + " \"properties\": {\n" - + " \"input_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"output_guardrail\": {\n" - + " \"properties\": {\n" - + " \"regex\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"stop_words\": {\n" - + " \"properties\": {\n" - + " \"index_name\": {\n" - + " \"type\": \"text\"\n" - + " },\n" - + " \"source_fields\": {\n" - + " \"type\": \"text\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModel.CONNECTOR_FIELD - + "\": {" - + ML_CONNECTOR_INDEX_FIELDS - + " }\n}," - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_TASK_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_TASK_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLTask.MODEL_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.TASK_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.FUNCTION_NAME_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.STATE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.INPUT_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.PROGRESS_FIELD - + "\": {\"type\": \"float\"},\n" - + " \"" - + MLTask.OUTPUT_INDEX_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.WORKER_NODE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + MLTask.CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.LAST_UPDATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLTask.ERROR_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + MLTask.IS_ASYNC_TASK_FIELD - + "\" : {\"type\" : \"boolean\"}, \n" - + " \"" - + MLTask.REMOTE_JOB_FIELD - + "\" : {\"type\": \"flat_object\"}, \n" - + USER_FIELD_MAPPING - + " }\n" - + "}"; - - public static final String ML_CONNECTOR_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONNECTOR_SCHEMA_VERSION - + "},\n" - + ML_CONNECTOR_INDEX_FIELDS - + ",\n" - + " \"" - + MLModelGroup.BACKEND_ROLES_FIELD - + "\": {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\": {\n" - + " \"type\": \"keyword\",\n" - + " \"ignore_above\": 256\n" - + " }\n" - + " }\n" - + " },\n" - + " \"" - + MLModelGroup.ACCESS - + "\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"" - + MLModelGroup.OWNER - + "\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"name\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\", \"ignore_above\":256}}},\n" - + " \"backend_roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"roles\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}},\n" - + " \"custom_attribute_names\": {\"type\":\"text\", \"fields\":{\"keyword\":{\"type\":\"keyword\"}}}\n" - + " }\n" - + " },\n" - + " \"" - + AbstractConnector.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + AbstractConnector.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONFIG_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONFIG_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MASTER_KEY - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + CONFIG_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + ML_CONFIGURATION_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - - public static final String ML_CONTROLLER_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_CONTROLLER_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLController.USER_RATE_LIMITER - + "\" : {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; - - public static final String ML_AGENT_INDEX_MAPPING = "{\n" - + " \"_meta\": {\"schema_version\": " - + ML_AGENT_INDEX_SCHEMA_VERSION - + "},\n" - + " \"properties\": {\n" - + " \"" - + MLAgent.AGENT_NAME_FIELD - + "\" : {\"type\":\"text\",\"fields\":{\"keyword\":{\"type\":\"keyword\",\"ignore_above\":256}}},\n" - + " \"" - + MLAgent.AGENT_TYPE_FIELD - + "\" : {\"type\":\"keyword\"},\n" - + " \"" - + MLAgent.DESCRIPTION_FIELD - + "\" : {\"type\": \"text\"},\n" - + " \"" - + MLAgent.LLM_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.TOOLS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.PARAMETERS_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.MEMORY_FIELD - + "\" : {\"type\": \"flat_object\"},\n" - + " \"" - + MLAgent.IS_HIDDEN_FIELD - + "\": {\"type\": \"boolean\"},\n" - + " \"" - + MLAgent.CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + MLAgent.LAST_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"}\n" - + " }\n" - + "}"; - public static final String ML_MEMORY_META_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"}\n" - + " }\n" - + "}"; + // Index mapping paths + public static final String ML_MODEL_GROUP_INDEX_MAPPING_PATH = "index-mappings/ml-model-group.json"; + public static final String ML_MODEL_INDEX_MAPPING_PATH = "index-mappings/ml-model.json"; + public static final String ML_TASK_INDEX_MAPPING_PATH = "index-mappings/ml-task.json"; + public static final String ML_CONNECTOR_INDEX_MAPPING_PATH = "index-mappings/ml-connector.json"; + public static final String ML_CONFIG_INDEX_MAPPING_PATH = "index-mappings/ml-config.json"; + public static final String ML_CONTROLLER_INDEX_MAPPING_PATH = "index-mappings/ml-controller.json"; + public static final String ML_AGENT_INDEX_MAPPING_PATH = "index-mappings/ml-agent.json"; + public static final String ML_MEMORY_META_INDEX_MAPPING_PATH = "index-mappings/ml-memory-meta.json"; + public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH = "index-mappings/ml-memory-message.json"; - public static final String ML_MEMORY_MESSAGE_INDEX_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; // Calculate Versions independently of OpenSearch core version public static final Version VERSION_2_11_0 = Version.fromString("2.11.0"); public static final Version VERSION_2_12_0 = Version.fromString("2.12.0"); diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java b/common/src/main/java/org/opensearch/ml/common/MLIndex.java similarity index 56% rename from ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java rename to common/src/main/java/org/opensearch/ml/common/MLIndex.java index 0cc329f1ac..c497452c6b 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/indices/MLIndex.java +++ b/common/src/main/java/org/opensearch/ml/common/MLIndex.java @@ -3,46 +3,42 @@ * SPDX-License-Identifier: Apache-2.0 */ -package org.opensearch.ml.engine.indices; +package org.opensearch.ml.common; import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_AGENT_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONFIG_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONNECTOR_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_CONTROLLER_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MEMORY_META_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX_MAPPING_PATH; import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING; -import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_SCHEMA_VERSION; +import static org.opensearch.ml.common.CommonValue.ML_TASK_INDEX_MAPPING_PATH; + +import java.io.IOException; +import java.io.UncheckedIOException; + +import org.opensearch.ml.common.utils.IndexUtils; public enum MLIndex { - MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING, ML_MODEL_GROUP_INDEX_SCHEMA_VERSION), - MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING, ML_MODEL_INDEX_SCHEMA_VERSION), - TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING, ML_TASK_INDEX_SCHEMA_VERSION), - CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING, ML_CONNECTOR_SCHEMA_VERSION), - CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING, ML_CONFIG_INDEX_SCHEMA_VERSION), - CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING, ML_CONTROLLER_INDEX_SCHEMA_VERSION), - AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING, ML_AGENT_INDEX_SCHEMA_VERSION), - MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING, ML_MEMORY_META_INDEX_SCHEMA_VERSION), - MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING, ML_MEMORY_MESSAGE_INDEX_SCHEMA_VERSION); + MODEL_GROUP(ML_MODEL_GROUP_INDEX, false, ML_MODEL_GROUP_INDEX_MAPPING_PATH), + MODEL(ML_MODEL_INDEX, false, ML_MODEL_INDEX_MAPPING_PATH), + TASK(ML_TASK_INDEX, false, ML_TASK_INDEX_MAPPING_PATH), + CONNECTOR(ML_CONNECTOR_INDEX, false, ML_CONNECTOR_INDEX_MAPPING_PATH), + CONFIG(ML_CONFIG_INDEX, false, ML_CONFIG_INDEX_MAPPING_PATH), + CONTROLLER(ML_CONTROLLER_INDEX, false, ML_CONTROLLER_INDEX_MAPPING_PATH), + AGENT(ML_AGENT_INDEX, false, ML_AGENT_INDEX_MAPPING_PATH), + MEMORY_META(ML_MEMORY_META_INDEX, false, ML_MEMORY_META_INDEX_MAPPING_PATH), + MEMORY_MESSAGE(ML_MEMORY_MESSAGE_INDEX, false, ML_MEMORY_MESSAGE_INDEX_MAPPING_PATH); private final String indexName; // whether we use an alias for the index @@ -50,11 +46,24 @@ public enum MLIndex { private final String mapping; private final Integer version; - MLIndex(String name, boolean alias, String mapping, Integer version) { + MLIndex(String name, boolean alias, String mappingPath) { this.indexName = name; this.alias = alias; - this.mapping = mapping; - this.version = version; + this.mapping = getMapping(mappingPath); + this.version = IndexUtils.getVersionFromMapping(this.mapping); + } + + private String getMapping(String mappingPath) { + if (mappingPath == null) { + throw new IllegalArgumentException("Mapping path cannot be null"); + } + + try { + return IndexUtils.getMappingFromFile(mappingPath); + } catch (IOException e) { + // Unchecked exception is thrown since the method is being called within a constructor + throw new UncheckedIOException("Failed to fetch index mapping from file: " + mappingPath, e); + } } public String getIndexName() { diff --git a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java index ac639babb2..88f4920761 100644 --- a/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java +++ b/common/src/main/java/org/opensearch/ml/common/conversation/ConversationalIndexConstants.java @@ -18,15 +18,15 @@ package org.opensearch.ml.common.conversation; import org.opensearch.common.settings.Setting; +import org.opensearch.ml.common.MLIndex; /** * Class containing a bunch of constant defining how the conversational indices are formatted + * ToDo: use MLIndex.MEMORY_MESSAGE and MLIndex.MEMORY_META directly for index names and mappings rather than constants */ public class ConversationalIndexConstants { - /** Version of the meta index schema */ - public final static Integer META_INDEX_SCHEMA_VERSION = 2; /** Name of the conversational metadata index */ - public final static String META_INDEX_NAME = ".plugins-ml-memory-meta"; + public final static String META_INDEX_NAME = MLIndex.MEMORY_META.getIndexName(); /** Name of the metadata field for initial timestamp */ public final static String META_CREATED_TIME_FIELD = "create_time"; /** Name of the metadata field for updated timestamp */ @@ -41,38 +41,10 @@ public class ConversationalIndexConstants { public final static String META_ADDITIONAL_INFO_FIELD = "additional_info"; /** Mappings for the conversational metadata index */ - public final static String META_MAPPING = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + META_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + META_NAME_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + META_CREATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + META_UPDATED_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + USER_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + APPLICATION_TYPE_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + META_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"}\n" - + " }\n" - + "}"; + public final static String META_MAPPING = MLIndex.MEMORY_META.getMapping(); - /** Version of the interactions index schema */ - public final static Integer INTERACTIONS_INDEX_SCHEMA_VERSION = 1; /** Name of the conversational interactions index */ - public final static String INTERACTIONS_INDEX_NAME = ".plugins-ml-memory-message"; + public final static String INTERACTIONS_INDEX_NAME = MLIndex.MEMORY_MESSAGE.getIndexName(); /** Name of the interaction field for the conversation Id */ public final static String INTERACTIONS_CONVERSATION_ID_FIELD = "memory_id"; /** Name of the interaction field for the human input */ @@ -92,42 +64,7 @@ public class ConversationalIndexConstants { /** The trace number of an interaction */ public final static String INTERACTIONS_TRACE_NUMBER_FIELD = "trace_number"; /** Mappings for the interactions index */ - public final static String INTERACTIONS_MAPPINGS = "{\n" - + " \"_meta\": {\n" - + " \"schema_version\": " - + INTERACTIONS_INDEX_SCHEMA_VERSION - + "\n" - + " },\n" - + " \"properties\": {\n" - + " \"" - + INTERACTIONS_CONVERSATION_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_CREATE_TIME_FIELD - + "\": {\"type\": \"date\", \"format\": \"strict_date_time||epoch_millis\"},\n" - + " \"" - + INTERACTIONS_INPUT_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_PROMPT_TEMPLATE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_RESPONSE_FIELD - + "\": {\"type\": \"text\"},\n" - + " \"" - + INTERACTIONS_ORIGIN_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_ADDITIONAL_INFO_FIELD - + "\": {\"type\": \"flat_object\"},\n" - + " \"" - + PARENT_INTERACTIONS_ID_FIELD - + "\": {\"type\": \"keyword\"},\n" - + " \"" - + INTERACTIONS_TRACE_NUMBER_FIELD - + "\": {\"type\": \"long\"}\n" - + " }\n" - + "}"; + public final static String INTERACTIONS_MAPPINGS = MLIndex.MEMORY_MESSAGE.getMapping(); /** Feature Flag setting for conversational memory */ public static final Setting ML_COMMONS_MEMORY_FEATURE_ENABLED = Setting diff --git a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java index 298bd3ec96..92ccb07bf9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/IndexUtils.java @@ -5,8 +5,15 @@ package org.opensearch.ml.common.utils; +import java.io.IOException; +import java.net.URL; import java.util.Map; +import com.google.common.base.Charsets; +import com.google.common.io.Resources; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; + import lombok.extern.log4j.Log4j2; @Log4j2 @@ -32,4 +39,40 @@ public class IndexUtils { // Note: This does not include static settings like number of shards, which can't be changed after index creation. public static final Map UPDATED_DEFAULT_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-1"); public static final Map UPDATED_ALL_NODES_REPLICA_INDEX_SETTINGS = Map.of("index.auto_expand_replicas", "0-all"); + + public static String getMappingFromFile(String path) throws IOException { + URL url = IndexUtils.class.getClassLoader().getResource(path); + if (url == null) { + throw new IOException("Resource not found: " + path); + } + + String mapping = Resources.toString(url, Charsets.UTF_8).trim(); + if (mapping.isEmpty() || !StringUtils.isJson(mapping)) { + throw new IllegalArgumentException("Invalid or non-JSON mapping at: " + path); + } + + return mapping; + } + + public static Integer getVersionFromMapping(String mapping) { + if (mapping == null || mapping.isBlank()) { + throw new IllegalArgumentException("Mapping cannot be null or empty"); + } + + JsonObject mappingJson = StringUtils.getJsonObjectFromString(mapping); + if (mappingJson == null || !mappingJson.has("_meta")) { + throw new JsonParseException("Failed to find \"_meta\" object in mapping: " + mapping); + } + + JsonObject metaObject = mappingJson.getAsJsonObject("_meta"); + if (metaObject == null || !metaObject.has("schema_version")) { + throw new JsonParseException("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping); + } + + try { + return metaObject.get("schema_version").getAsInt(); + } catch (NumberFormatException | ClassCastException e) { + throw new JsonParseException("Invalid \"schema_version\" value in mapping: " + mapping, e); + } + } } diff --git a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java index 37bfac6f3f..fcc0c4c3c9 100644 --- a/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java +++ b/common/src/main/java/org/opensearch/ml/common/utils/StringUtils.java @@ -26,6 +26,7 @@ import com.google.gson.Gson; import com.google.gson.JsonElement; +import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.google.gson.JsonSyntaxException; import com.jayway.jsonpath.JsonPath; @@ -53,12 +54,16 @@ public class StringUtils { } public static final String TO_STRING_FUNCTION_NAME = ".toString()"; - public static boolean isValidJsonString(String Json) { + public static boolean isValidJsonString(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { - new JSONObject(Json); + new JSONObject(json); } catch (JSONException ex) { try { - new JSONArray(Json); + new JSONArray(json); } catch (JSONException ex1) { return false; } @@ -67,6 +72,10 @@ public static boolean isValidJsonString(String Json) { } public static boolean isJson(String json) { + if (json == null || json.isBlank()) { + return false; + } + try { if (!isValidJsonString(json)) { return false; @@ -319,4 +328,12 @@ public static boolean isValidJSONPath(String input) { } } + public static JsonObject getJsonObjectFromString(String jsonString) { + if (jsonString == null || jsonString.isBlank()) { + throw new IllegalArgumentException("Json cannot be null or empty"); + } + + return JsonParser.parseString(jsonString).getAsJsonObject(); + } + } diff --git a/common/src/main/resources/index-mappings/ml-agent.json b/common/src/main/resources/index-mappings/ml-agent.json new file mode 100644 index 0000000000..2bcee6bc3b --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-agent.json @@ -0,0 +1,45 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "llm": { + "type": "flat_object" + }, + "tools": { + "type": "flat_object" + }, + "parameters": { + "type": "flat_object" + }, + "memory": { + "type": "flat_object" + }, + "is_hidden": { + "type": "boolean" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-config.json b/common/src/main/resources/index-mappings/ml-config.json new file mode 100644 index 0000000000..6d36d8efb7 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-config.json @@ -0,0 +1,24 @@ +{ + "_meta": { + "schema_version": 4 + }, + "properties": { + "master_key": { + "type": "keyword" + }, + "config_type": { + "type": "keyword" + }, + "ml_configuration": { + "type": "flat_object" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-connector.json b/common/src/main/resources/index-mappings/ml-connector.json new file mode 100644 index 0000000000..4be168c4b9 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-connector.json @@ -0,0 +1,95 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-controller.json b/common/src/main/resources/index-mappings/ml-controller.json new file mode 100644 index 0000000000..6822fb19c5 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-controller.json @@ -0,0 +1,10 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "user_rate_limiter": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-message.json b/common/src/main/resources/index-mappings/ml-memory-message.json new file mode 100644 index 0000000000..10b081aee1 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-message.json @@ -0,0 +1,35 @@ +{ + "_meta": { + "schema_version": 1 + }, + "properties": { + "memory_id": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "input": { + "type": "text" + }, + "prompt_template": { + "type": "text" + }, + "response": { + "type": "text" + }, + "origin": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + }, + "parent_message_id": { + "type": "keyword" + }, + "trace_number": { + "type": "long" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-memory-meta.json b/common/src/main/resources/index-mappings/ml-memory-meta.json new file mode 100644 index 0000000000..7684e25d06 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-memory-meta.json @@ -0,0 +1,27 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "user": { + "type": "keyword" + }, + "application_type": { + "type": "keyword" + }, + "additional_info": { + "type": "flat_object" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model-group.json b/common/src/main/resources/index-mappings/ml-model-group.json new file mode 100644 index 0000000000..7e2437e534 --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model-group.json @@ -0,0 +1,83 @@ +{ + "_meta": { + "schema_version": 2 + }, + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "description": { + "type": "text" + }, + "latest_version": { + "type": "integer" + }, + "model_group_id": { + "type": "keyword" + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "access": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-model.json b/common/src/main/resources/index-mappings/ml-model.json new file mode 100644 index 0000000000..b996e463cd --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-model.json @@ -0,0 +1,243 @@ +{ + "_meta": { + "schema_version": 11 + }, + "properties": { + "algorithm": { + "type": "keyword" + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "long" + }, + "model_version": { + "type": "keyword" + }, + "model_group_id": { + "type": "keyword" + }, + "model_content": { + "type": "binary" + }, + "chunk_number": { + "type": "long" + }, + "total_chunks": { + "type": "long" + }, + "model_id": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "model_format": { + "type": "keyword" + }, + "model_state": { + "type": "keyword" + }, + "model_content_size_in_bytes": { + "type": "long" + }, + "planning_worker_node_count": { + "type": "integer" + }, + "current_worker_node_count": { + "type": "integer" + }, + "planning_worker_nodes": { + "type": "keyword" + }, + "deploy_to_all_nodes": { + "type": "boolean" + }, + "is_hidden": { + "type": "boolean" + }, + "model_config": { + "properties": { + "model_type": { + "type": "keyword" + }, + "embedding_dimension": { + "type": "integer" + }, + "framework_type": { + "type": "keyword" + }, + "pooling_mode": { + "type": "keyword" + }, + "normalize_result": { + "type": "boolean" + }, + "model_max_length": { + "type": "integer" + }, + "all_config": { + "type": "text" + } + } + }, + "deploy_setting": { + "type": "flat_object" + }, + "is_enabled": { + "type": "boolean" + }, + "is_controller_enabled": { + "type": "boolean" + }, + "rate_limiter": { + "type": "flat_object" + }, + "model_content_hash_value": { + "type": "keyword" + }, + "auto_redeploy_retry_times": { + "type": "integer" + }, + "created_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_updated_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_registered_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_deployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_undeployed_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "interface": { + "type": "flat_object" + }, + "guardrails": { + "properties": { + "input_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + }, + "output_guardrail": { + "properties": { + "regex": { + "type": "text" + }, + "stop_words": { + "properties": { + "index_name": { + "type": "text" + }, + "source_fields": { + "type": "text" + } + } + } + } + } + } + }, + "connector": { + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "version": { + "type": "keyword" + }, + "description": { + "type": "text" + }, + "protocol": { + "type": "keyword" + }, + "parameters": { + "type": "flat_object" + }, + "credential": { + "type": "flat_object" + }, + "client_config": { + "type": "flat_object" + }, + "actions": { + "type": "flat_object" + } + } + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/main/resources/index-mappings/ml-task.json b/common/src/main/resources/index-mappings/ml-task.json new file mode 100644 index 0000000000..ad428724bf --- /dev/null +++ b/common/src/main/resources/index-mappings/ml-task.json @@ -0,0 +1,86 @@ +{ + "_meta": { + "schema_version": 3 + }, + "properties": { + "model_id": { + "type": "keyword" + }, + "task_type": { + "type": "keyword" + }, + "function_name": { + "type": "keyword" + }, + "state": { + "type": "keyword" + }, + "input_type": { + "type": "keyword" + }, + "progress": { + "type": "float" + }, + "output_index": { + "type": "keyword" + }, + "worker_node": { + "type": "keyword" + }, + "create_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "last_update_time": { + "type": "date", + "format": "strict_date_time||epoch_millis" + }, + "error": { + "type": "text" + }, + "is_async": { + "type": "boolean" + }, + "remote_job": { + "type": "flat_object" + }, + "user": { + "type": "nested", + "properties": { + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "backend_roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "roles": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "custom_attribute_names": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + } + } + } +} diff --git a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java index 8cfad37c98..a4b3badacf 100644 --- a/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java +++ b/common/src/test/java/org/opensearch/ml/common/utils/IndexUtilsTest.java @@ -6,11 +6,15 @@ package org.opensearch.ml.common.utils; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import java.io.IOException; import java.util.Map; import org.junit.Test; +import com.google.gson.JsonParseException; + public class IndexUtilsTest { @Test @@ -42,4 +46,110 @@ public void testUpdatedAllNodesReplicaIndexSettingsContainsExpectedValues() { assertEquals("index.auto_expand_replicas should be 0-all", updatedIndexSettings.get("index.auto_expand_replicas"), "0-all"); assertEquals("INDEX_SETTINGS should contain exactly 1 settings", 1, updatedIndexSettings.size()); } + + @Test + public void testGetMappingFromFile() { + String expectedMapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + try { + String actualMapping = IndexUtils.getMappingFromFile("index-mappings/test-mapping.json"); + // comparing JsonObjects to avoid issues caused by eol character in different OS + assertEquals(StringUtils.getJsonObjectFromString(expectedMapping), StringUtils.getJsonObjectFromString(actualMapping)); + } catch (IOException e) { + throw new RuntimeException("Failed to read file at path: index-mappings/test-mapping.json"); + } + } + + @Test + public void testGetMappingFromFileFileNotFound() { + String path = "index-mappings/test-mapping-not-found.json"; + IOException e = assertThrows(IOException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Resource not found: " + path, e.getMessage()); + } + + @Test + public void testGetMappingFromFilesMalformedJson() { + String path = "index-mappings/test-mapping-malformed.json"; + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> IndexUtils.getMappingFromFile(path)); + assertEquals("Invalid or non-JSON mapping at: " + path, e.getMessage()); + } + + @Test + public void testGetVersionFromMapping() { + Integer expectedVersion = 1; + String mapping = "{\n" + + " \"_meta\": {\n" + + " \"schema_version\": \"1\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + assertEquals(expectedVersion, IndexUtils.getVersionFromMapping(mapping)); + } + + @Test + public void testGetVersionFromMappingNoMeta() { + String mapping = "{\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"_meta\" object in mapping: " + mapping, e.getMessage()); + } + + @Test + public void testGetVersionFromMappingNoSchemaVersion() { + String mapping = "{\n" + + " \"_meta\": {\n" + + " },\n" + + " \"properties\": {\n" + + " \"test_field_1\": {\n" + + " \"type\": \"test_type_1\"\n" + + " },\n" + + " \"test_field_2\": {\n" + + " \"type\": \"test_type_2\"\n" + + " },\n" + + " \"test_field_3\": {\n" + + " \"type\": \"test_type_3\"\n" + + " }\n" + + " }\n" + + "}\n"; + + JsonParseException e = assertThrows(JsonParseException.class, () -> IndexUtils.getVersionFromMapping(mapping)); + assertEquals("Failed to find \"schema_version\" in \"_meta\" object for mapping: " + mapping, e.getMessage()); + } } diff --git a/common/src/test/resources/index-mappings/test-mapping-malformed.json b/common/src/test/resources/index-mappings/test-mapping-malformed.json new file mode 100644 index 0000000000..f87e98da9b --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping-malformed.json @@ -0,0 +1,13 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + { + "malformed": } + } + } +} diff --git a/common/src/test/resources/index-mappings/test-mapping.json b/common/src/test/resources/index-mappings/test-mapping.json new file mode 100644 index 0000000000..6114de4687 --- /dev/null +++ b/common/src/test/resources/index-mappings/test-mapping.json @@ -0,0 +1,16 @@ +{ + "_meta": { + "schema_version": "1" + }, + "properties": { + "test_field_1": { + "type": "test_type_1" + }, + "test_field_2": { + "type": "test_type_2" + }, + "test_field_3": { + "type": "test_type_3" + } + } +} diff --git a/memory/build.gradle b/memory/build.gradle index b6198509d0..8251303158 100644 --- a/memory/build.gradle +++ b/memory/build.gradle @@ -37,6 +37,7 @@ dependencies { testImplementation "org.opensearch.test:framework:${opensearch_version}" testImplementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}" testImplementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' + testImplementation group: 'org.json', name: 'json', version: '20231013' } test { diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java index e6a15ecdae..9efe9372b8 100644 --- a/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java +++ b/ml-algorithms/src/main/java/org/opensearch/ml/engine/algorithms/metrics_correlation/MetricsCorrelation.java @@ -8,7 +8,6 @@ import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.index.query.QueryBuilders.termQuery; import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX; -import static org.opensearch.ml.common.CommonValue.ML_MODEL_GROUP_INDEX_MAPPING; import static org.opensearch.ml.common.CommonValue.ML_MODEL_INDEX; import static org.opensearch.ml.common.MLModel.MODEL_STATE_FIELD; @@ -40,6 +39,7 @@ import org.opensearch.ml.common.AccessMode; import org.opensearch.ml.common.CommonValue; import org.opensearch.ml.common.FunctionName; +import org.opensearch.ml.common.MLIndex; import org.opensearch.ml.common.MLModel; import org.opensearch.ml.common.MLModelGroup; import org.opensearch.ml.common.MLTask; @@ -131,7 +131,7 @@ public void execute(Input input, ActionListener