From c0a589c744f0a789893ad959fcee15b711e9dc5d Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Mon, 8 Nov 2021 23:02:16 -0800 Subject: [PATCH 01/10] Add initialized library stats Signed-off-by: John Mazanec --- .../opensearch/knn/index/util/KNNEngine.java | 13 +++ .../opensearch/knn/index/util/KNNLibrary.java | 27 +++++ .../org/opensearch/knn/jni/FaissService.java | 2 + .../org/opensearch/knn/jni/NmslibService.java | 2 + .../knn/plugin/stats/KNNStatsConfig.java | 6 ++ .../knn/plugin/stats/StatNames.java | 2 + .../suppliers/LibraryInitializedSupplier.java | 37 +++++++ .../LibraryInitializedSupplierTests.java | 100 ++++++++++++++++++ 8 files changed, 189 insertions(+) create mode 100644 src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java create mode 100644 src/test/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplierTests.java diff --git a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java index e3a7734e1..26fe87cb6 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java @@ -17,6 +17,7 @@ import org.opensearch.knn.index.SpaceType; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; import static org.opensearch.knn.common.KNNConstants.NMSLIB_NAME; @@ -41,9 +42,11 @@ public enum KNNEngine implements KNNLibrary { KNNEngine(String name, KNNLibrary knnLibrary) { this.name = name; this.knnLibrary = knnLibrary; + this.initialized = new AtomicBoolean(false); } private String name; + private AtomicBoolean initialized; private KNNLibrary knnLibrary; /** @@ -143,4 +146,14 @@ public Map getMethodAsMap(KNNMethodContext knnMethodContext) { public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) { return knnLibrary.estimateOverheadInKB(knnMethodContext, dimension); } + + @Override + public Boolean isInitialized() { + return knnLibrary.isInitialized(); + } + + @Override + public void setInitialized(Boolean isInitialized) { + knnLibrary.setInitialized(isInitialized); + } } diff --git a/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java b/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java index 90e9cafc2..04866140f 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNLibrary.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; @@ -138,6 +139,20 @@ public interface KNNLibrary { */ Map getMethodAsMap(KNNMethodContext knnMethodContext); + /** + * Getter for initialized + * + * @return whether library has been initialized + */ + Boolean isInitialized(); + + /** + * Set initialized to true + * + * @param isInitialized whether library has been initialized + */ + void setInitialized(Boolean isInitialized); + /** * Abstract implementation of KNNLibrary. It contains several default methods and fields that * are common across different underlying libraries. @@ -148,6 +163,7 @@ abstract class NativeLibrary implements KNNLibrary { private String latestLibraryBuildVersion; private String latestLibraryVersion; private String extension; + private AtomicBoolean initialized; /** * Constructor for NativeLibrary @@ -166,6 +182,7 @@ public NativeLibrary(Map methods, Map getMethodAsMap(KNNMethodContext knnMethodContext) { return knnMethod.getAsMap(knnMethodContext); } + + @Override + public Boolean isInitialized() { + return initialized.get(); + } + + @Override + public void setInitialized(Boolean isInitialized) { + initialized.set(isInitialized); + } } /** diff --git a/src/main/java/org/opensearch/knn/jni/FaissService.java b/src/main/java/org/opensearch/knn/jni/FaissService.java index ab1e9ee3e..5287bc603 100644 --- a/src/main/java/org/opensearch/knn/jni/FaissService.java +++ b/src/main/java/org/opensearch/knn/jni/FaissService.java @@ -13,6 +13,7 @@ import org.opensearch.knn.common.KNNConstants; import org.opensearch.knn.index.KNNQueryResult; +import org.opensearch.knn.index.util.KNNEngine; import java.security.AccessController; import java.security.PrivilegedAction; @@ -34,6 +35,7 @@ class FaissService { initLibrary(); return null; }); + KNNEngine.FAISS.setInitialized(true); } /** diff --git a/src/main/java/org/opensearch/knn/jni/NmslibService.java b/src/main/java/org/opensearch/knn/jni/NmslibService.java index c9154bc1d..8429b3de0 100644 --- a/src/main/java/org/opensearch/knn/jni/NmslibService.java +++ b/src/main/java/org/opensearch/knn/jni/NmslibService.java @@ -13,6 +13,7 @@ import org.opensearch.knn.common.KNNConstants; import org.opensearch.knn.index.KNNQueryResult; +import org.opensearch.knn.index.util.KNNEngine; import java.security.AccessController; import java.security.PrivilegedAction; @@ -34,6 +35,7 @@ class NmslibService { initLibrary(); return null; }); + KNNEngine.NMSLIB.setInitialized(true); } /** diff --git a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java index 1115227a5..62d79f573 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java @@ -29,8 +29,10 @@ import com.google.common.collect.ImmutableMap; import org.opensearch.knn.common.KNNConstants; import org.opensearch.knn.index.memory.NativeMemoryCacheManager; +import org.opensearch.knn.index.util.KNNEngine; import org.opensearch.knn.indices.ModelCache; import org.opensearch.knn.indices.ModelDao; +import org.opensearch.knn.plugin.stats.suppliers.LibraryInitializedSupplier; import org.opensearch.knn.plugin.stats.suppliers.EventOccurredWithinThresholdSupplier; import org.opensearch.knn.plugin.stats.suppliers.KNNCircuitBreakerSupplier; import org.opensearch.knn.plugin.stats.suppliers.KNNCounterSupplier; @@ -91,5 +93,9 @@ public class KNNStatsConfig { new ModelIndexingDegradingSupplier(ModelCache::getEvictedDueToSizeAt), KNNConstants.MODEL_CACHE_CAPACITY_ATROPHY_THRESHOLD_IN_MINUTES, ChronoUnit.MINUTES))) + .put(StatNames.FAISS_LOADED.getName(), new KNNStat<>(false, + new LibraryInitializedSupplier(KNNEngine.FAISS))) + .put(StatNames.NMSLIB_LOADED.getName(), new KNNStat<>(false, + new LibraryInitializedSupplier(KNNEngine.NMSLIB))) .build(); } diff --git a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java index f653a0f91..8818b0a92 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java @@ -44,6 +44,8 @@ public enum StatNames { INDICES_IN_CACHE("indices_in_cache"), CIRCUIT_BREAKER_TRIGGERED("circuit_breaker_triggered"), MODEL_INDEX_STATUS("model_index_status"), + FAISS_LOADED("faiss_initialized"), + NMSLIB_LOADED("nmslib_initialized"), INDEXING_FROM_MODEL_DEGRADED("indexing_from_model_degraded"), GRAPH_QUERY_ERRORS(KNNCounter.GRAPH_QUERY_ERRORS.getName()), GRAPH_QUERY_REQUESTS(KNNCounter.GRAPH_QUERY_REQUESTS.getName()), diff --git a/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java b/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java new file mode 100644 index 000000000..9e0d1a315 --- /dev/null +++ b/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.plugin.stats.suppliers; + +import org.opensearch.knn.index.util.KNNLibrary; + +import java.util.function.Supplier; + +/** + * Supplier to determine whether library has been initialized + */ +public class LibraryInitializedSupplier implements Supplier { + private KNNLibrary knnLibrary; + + /** + * Constructor + * + * @param knnLibrary to check if initialized + */ + public LibraryInitializedSupplier(KNNLibrary knnLibrary) { + this.knnLibrary = knnLibrary; + } + + @Override + public Boolean get() { + return knnLibrary.isInitialized(); + } +} \ No newline at end of file diff --git a/src/test/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplierTests.java b/src/test/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplierTests.java new file mode 100644 index 000000000..1d942ee44 --- /dev/null +++ b/src/test/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplierTests.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.plugin.stats.suppliers; + +import org.opensearch.common.ValidationException; +import org.opensearch.knn.index.KNNMethod; +import org.opensearch.knn.index.KNNMethodContext; +import org.opensearch.knn.index.SpaceType; +import org.opensearch.knn.index.util.KNNLibrary; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Map; + +public class LibraryInitializedSupplierTests extends OpenSearchTestCase { + + public void testEngineInitialized() { + KNNLibrary knnLibrary = new TestLibrary(); + LibraryInitializedSupplier libraryInitializedSupplier = new LibraryInitializedSupplier(knnLibrary); + knnLibrary.setInitialized(false); + assertFalse(libraryInitializedSupplier.get()); + knnLibrary.setInitialized(true); + assertTrue(libraryInitializedSupplier.get()); + } + + private class TestLibrary implements KNNLibrary { + private Boolean initialized; + TestLibrary() { + this.initialized = false; + } + + @Override + public String getLatestBuildVersion() { + return null; + } + + @Override + public String getLatestLibVersion() { + return null; + } + + @Override + public String getExtension() { + return null; + } + + @Override + public String getCompoundExtension() { + return null; + } + + @Override + public KNNMethod getMethod(String methodName) { + return null; + } + + @Override + public float score(float rawScore, SpaceType spaceType) { + return 0; + } + + @Override + public ValidationException validateMethod(KNNMethodContext knnMethodContext) { + return null; + } + + @Override + public boolean isTrainingRequired(KNNMethodContext knnMethodContext) { + return false; + } + + @Override + public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) { + return 0; + } + + @Override + public Map getMethodAsMap(KNNMethodContext knnMethodContext) { + return null; + } + + @Override + public Boolean isInitialized() { + return initialized; + } + + @Override + public void setInitialized(Boolean isInitialized) { + this.initialized = isInitialized; + } + } +} From 1ac64a55fd286cfa23ed511557c9f4074d43ee14 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 09:23:24 -0800 Subject: [PATCH 02/10] Add counters for training requests and errors Signed-off-by: John Mazanec --- .../org/opensearch/knn/plugin/stats/KNNCounter.java | 4 +++- .../opensearch/knn/plugin/stats/KNNStatsConfig.java | 4 ++++ .../org/opensearch/knn/plugin/stats/StatNames.java | 2 ++ .../transport/TrainingJobRouterTransportAction.java | 10 ++++++++-- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/opensearch/knn/plugin/stats/KNNCounter.java b/src/main/java/org/opensearch/knn/plugin/stats/KNNCounter.java index 46ee524d5..c8ccfe530 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/KNNCounter.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/KNNCounter.java @@ -39,7 +39,9 @@ public enum KNNCounter { SCRIPT_COMPILATIONS("script_compilations"), SCRIPT_COMPILATION_ERRORS("script_compilation_errors"), SCRIPT_QUERY_REQUESTS("script_query_requests"), - SCRIPT_QUERY_ERRORS("script_query_errors"); + SCRIPT_QUERY_ERRORS("script_query_errors"), + TRAINING_REQUESTS("training_requests"), + TRAINING_ERRORS("training_errors"); private String name; private AtomicLong count; diff --git a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java index 62d79f573..335faa92a 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java @@ -97,5 +97,9 @@ public class KNNStatsConfig { new LibraryInitializedSupplier(KNNEngine.FAISS))) .put(StatNames.NMSLIB_LOADED.getName(), new KNNStat<>(false, new LibraryInitializedSupplier(KNNEngine.NMSLIB))) + .put(StatNames.TRAINING_REQUESTS.getName(), new KNNStat<>(false, + new KNNCounterSupplier(KNNCounter.TRAINING_REQUESTS))) + .put(StatNames.TRAINING_ERRORS.getName(), new KNNStat<>(false, + new KNNCounterSupplier(KNNCounter.TRAINING_ERRORS))) .build(); } diff --git a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java index 8818b0a92..f271fcdef 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java @@ -55,6 +55,8 @@ public enum StatNames { SCRIPT_COMPILATIONS(KNNCounter.SCRIPT_COMPILATIONS.getName()), SCRIPT_COMPILATION_ERRORS(KNNCounter.SCRIPT_COMPILATION_ERRORS.getName()), SCRIPT_QUERY_REQUESTS(KNNCounter.SCRIPT_QUERY_REQUESTS.getName()), + TRAINING_REQUESTS(KNNCounter.TRAINING_REQUESTS.getName()), + TRAINING_ERRORS(KNNCounter.TRAINING_ERRORS.getName()), SCRIPT_QUERY_ERRORS(KNNCounter.SCRIPT_QUERY_ERRORS.getName()); private String name; diff --git a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java index 0a24c0c47..ec9a9a121 100644 --- a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java +++ b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java @@ -23,11 +23,13 @@ import org.opensearch.common.ValidationException; import org.opensearch.common.collect.ImmutableOpenMap; import org.opensearch.common.inject.Inject; +import org.opensearch.knn.plugin.stats.KNNCounter; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.tasks.Task; import org.opensearch.transport.TransportRequestOptions; import org.opensearch.transport.TransportService; +import javax.swing.*; import java.util.concurrent.RejectedExecutionException; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES; @@ -58,10 +60,14 @@ protected void doExecute(Task task, TrainingModelRequest request, // Get the size of the training request and then route the request. We get/set this here, as opposed to in // TrainingModelTransportAction, because in the future, we may want to use size to factor into our routing // decision. + KNNCounter.TRAINING_REQUESTS.increment(); + ActionListener wrappedListener = ActionListener.wrap(listener::onResponse, ex -> { + KNNCounter.TRAINING_ERRORS.increment(); + }); getTrainingIndexSizeInKB(request, ActionListener.wrap(size -> { request.setTrainingDataSizeInKB(size); - routeRequest(request, listener); - }, listener::onFailure)); + routeRequest(request, wrappedListener); + }, wrappedListener::onFailure)); } protected void routeRequest(TrainingModelRequest request, ActionListener listener) { From 7827de8881c11bd2b8cb9b2fc76bb11be624a5e2 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 09:47:41 -0800 Subject: [PATCH 03/10] Add training memory usage stats Signed-off-by: John Mazanec --- .../memory/NativeMemoryCacheManager.java | 65 ++++++++++++++++--- .../knn/plugin/stats/KNNStatsConfig.java | 8 ++- .../knn/plugin/stats/StatNames.java | 2 + .../suppliers/LibraryInitializedSupplier.java | 2 +- .../memory/NativeMemoryCacheManagerTests.java | 34 ++++++++++ 5 files changed, 98 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java b/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java index 18635fa00..5ea430293 100644 --- a/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java +++ b/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java @@ -118,16 +118,22 @@ public long getCacheSizeInKilobytes() { } /** - * Returns the current size of an index in the cache in KiloBytes. + * Returns how full the cache is as a percentage of the total cache capacity. * - * @param indexName Name if index to get the weight for - * @return Size of the index in the cache in kilobytes + * @return Percentage of the cache full */ - public Long getIndexSizeInKilobytes(final String indexName) { - Validate.notNull(indexName, "Index name cannot be null"); + public Float getCacheSizeAsPercentage() { + return 100 * getCacheSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + } + + /** + * Getter for current size of all indices in Kilobytes. + * + * @return current size of the cache + */ + public long getIndicesSizeInKilobytes() { return cache.asMap().values().stream() .filter(nativeMemoryAllocation -> nativeMemoryAllocation instanceof NativeMemoryAllocation.IndexAllocation) - .filter(indexAllocation -> indexName.equals(((NativeMemoryAllocation.IndexAllocation) indexAllocation).getOpenSearchIndexName())) .mapToLong(NativeMemoryAllocation::getSizeInKB) .sum(); } @@ -137,8 +143,23 @@ public Long getIndexSizeInKilobytes(final String indexName) { * * @return Percentage of the cache full */ - public Float getCacheSizeAsPercentage() { - return 100 * getCacheSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + public Float getIndicesSizeAsPercentage() { + return 100 * getIndicesSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + } + + /** + * Returns the current size of an index in the cache in KiloBytes. + * + * @param indexName Name if index to get the weight for + * @return Size of the index in the cache in kilobytes + */ + public Long getIndexSizeInKilobytes(final String indexName) { + Validate.notNull(indexName, "Index name cannot be null"); + return cache.asMap().values().stream() + .filter(nativeMemoryAllocation -> nativeMemoryAllocation instanceof NativeMemoryAllocation.IndexAllocation) + .filter(indexAllocation -> indexName.equals(((NativeMemoryAllocation.IndexAllocation) indexAllocation).getOpenSearchIndexName())) + .mapToLong(NativeMemoryAllocation::getSizeInKB) + .sum(); } /** @@ -152,6 +173,30 @@ public Float getIndexSizeAsPercentage(final String indexName) { return 100 * getIndexSizeInKilobytes(indexName) / (float) KNNSettings.getCircuitBreakerLimit().getKb(); } + /** + * Getter for current size of all training jobs in Kilobytes. + * + * @return current size of the cache + */ + public long getTrainingSizeInKilobytes() { + // Currently, all allocations that are not index allocations will be for training. + return cache.asMap().values().stream() + .filter(nativeMemoryAllocation -> + nativeMemoryAllocation instanceof NativeMemoryAllocation.TrainingDataAllocation || + nativeMemoryAllocation instanceof NativeMemoryAllocation.AnonymousAllocation) + .mapToLong(NativeMemoryAllocation::getSizeInKB) + .sum(); + } + + /** + * Returns how full the cache is as a percentage of the total cache capacity. + * + * @return Percentage of the cache full + */ + public Float getTrainingSizeAsPercentage() { + return 100 * getTrainingSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + } + /** * Getter for maximum weight of the cache. * @@ -245,9 +290,9 @@ public void setCacheCapacityReached(Boolean value) { } /** - * Get the stats of all of the Elasticsearch indices currently loaded into the cache + * Get the stats of all of the OpenSearch indices currently loaded into the cache * - * @return Map containing all of the Elasticsearch indices in the cache and their stats + * @return Map containing all of the OpenSearch indices in the cache and their stats */ public Map> getIndicesCacheStats() { Map> statValues = new HashMap<>(); diff --git a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java index 335faa92a..80decaadb 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java @@ -59,9 +59,9 @@ public class KNNStatsConfig { .put(StatNames.EVICTION_COUNT.getName(), new KNNStat<>(false, new KNNInnerCacheStatsSupplier(CacheStats::evictionCount))) .put(StatNames.GRAPH_MEMORY_USAGE.getName(), new KNNStat<>(false, - new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getCacheSizeInKilobytes))) + new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesSizeInKilobytes))) .put(StatNames.GRAPH_MEMORY_USAGE_PERCENTAGE.getName(), new KNNStat<>(false, - new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getCacheSizeAsPercentage))) + new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesSizeAsPercentage))) .put(StatNames.INDICES_IN_CACHE.getName(), new KNNStat<>(false, new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesCacheStats))) .put(StatNames.CACHE_CAPACITY_REACHED.getName(), new KNNStat<>(false, @@ -101,5 +101,9 @@ public class KNNStatsConfig { new KNNCounterSupplier(KNNCounter.TRAINING_REQUESTS))) .put(StatNames.TRAINING_ERRORS.getName(), new KNNStat<>(false, new KNNCounterSupplier(KNNCounter.TRAINING_ERRORS))) + .put(StatNames.TRAINING_MEMORY_USAGE.getName(), new KNNStat<>(false, + new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getTrainingSizeInKilobytes))) + .put(StatNames.TRAINING_MEMORY_USAGE_PERCENTAGE.getName(), new KNNStat<>(false, + new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getTrainingSizeAsPercentage))) .build(); } diff --git a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java index f271fcdef..b535e01f3 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/StatNames.java @@ -57,6 +57,8 @@ public enum StatNames { SCRIPT_QUERY_REQUESTS(KNNCounter.SCRIPT_QUERY_REQUESTS.getName()), TRAINING_REQUESTS(KNNCounter.TRAINING_REQUESTS.getName()), TRAINING_ERRORS(KNNCounter.TRAINING_ERRORS.getName()), + TRAINING_MEMORY_USAGE("training_memory_usage"), + TRAINING_MEMORY_USAGE_PERCENTAGE("training_memory_usage_percentage"), SCRIPT_QUERY_ERRORS(KNNCounter.SCRIPT_QUERY_ERRORS.getName()); private String name; diff --git a/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java b/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java index 9e0d1a315..800c1a827 100644 --- a/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java +++ b/src/main/java/org/opensearch/knn/plugin/stats/suppliers/LibraryInitializedSupplier.java @@ -34,4 +34,4 @@ public LibraryInitializedSupplier(KNNLibrary knnLibrary) { public Boolean get() { return knnLibrary.isInitialized(); } -} \ No newline at end of file +} diff --git a/src/test/java/org/opensearch/knn/index/memory/NativeMemoryCacheManagerTests.java b/src/test/java/org/opensearch/knn/index/memory/NativeMemoryCacheManagerTests.java index f912dc6e1..079e41e56 100644 --- a/src/test/java/org/opensearch/knn/index/memory/NativeMemoryCacheManagerTests.java +++ b/src/test/java/org/opensearch/knn/index/memory/NativeMemoryCacheManagerTests.java @@ -173,6 +173,40 @@ public void testGetIndexSizeAsPercentage() throws ExecutionException, IOExceptio nativeMemoryCacheManager.close(); } + public void testGetTrainingSize() throws ExecutionException { + NativeMemoryCacheManager nativeMemoryCacheManager = new NativeMemoryCacheManager(); + long maxWeight = nativeMemoryCacheManager.getMaxCacheSizeInKilobytes(); + int genericEntryWeight = (int) (maxWeight / 3); + int allocationEntryWeight = (int) (maxWeight / 3); + + TestNativeMemoryEntryContent testNativeMemoryEntryContent = new TestNativeMemoryEntryContent( + "test-1", genericEntryWeight, 0); + + nativeMemoryCacheManager.get(testNativeMemoryEntryContent, true); + + String indexName = "test-index"; + String key = "test-key"; + + NativeMemoryAllocation.TrainingDataAllocation trainingDataAllocation = new NativeMemoryAllocation.TrainingDataAllocation( + null, + 0, + allocationEntryWeight + ); + + NativeMemoryEntryContext.TrainingDataEntryContext trainingDataEntryContext = + mock(NativeMemoryEntryContext.TrainingDataEntryContext.class); + when(trainingDataEntryContext.load()).thenReturn(trainingDataAllocation); + when(trainingDataEntryContext.getKey()).thenReturn(key); + + nativeMemoryCacheManager.get(trainingDataEntryContext, true); + + assertEquals((float) allocationEntryWeight, nativeMemoryCacheManager.getTrainingSizeInKilobytes(), 0.001); + assertEquals(100 * (float) allocationEntryWeight / (float) maxWeight, + nativeMemoryCacheManager.getTrainingSizeAsPercentage(), 0.001); + + nativeMemoryCacheManager.close(); + } + public void testGetIndexGraphCount() throws ExecutionException, IOException { NativeMemoryCacheManager nativeMemoryCacheManager = new NativeMemoryCacheManager(); long maxWeight = nativeMemoryCacheManager.getMaxCacheSizeInKilobytes(); From 99bdcdb347f0d0b1ca27e982ba10abdd369b8092 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 10:12:13 -0800 Subject: [PATCH 04/10] Add forgotten listener call Signed-off-by: John Mazanec --- .../knn/plugin/transport/TrainingJobRouterTransportAction.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java index ec9a9a121..b2dcbf986 100644 --- a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java +++ b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java @@ -63,6 +63,7 @@ protected void doExecute(Task task, TrainingModelRequest request, KNNCounter.TRAINING_REQUESTS.increment(); ActionListener wrappedListener = ActionListener.wrap(listener::onResponse, ex -> { KNNCounter.TRAINING_ERRORS.increment(); + listener.onFailure(ex); }); getTrainingIndexSizeInKB(request, ActionListener.wrap(size -> { request.setTrainingDataSizeInKB(size); From 68f9e73c6a0fef34123916c34900713234898bc4 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 10:22:51 -0800 Subject: [PATCH 05/10] Move training counters Signed-off-by: John Mazanec --- .../TrainingJobRouterTransportAction.java | 9 ++------- .../transport/TrainingModelTransportAction.java | 14 +++++++++++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java index b2dcbf986..8016b1333 100644 --- a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java +++ b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java @@ -60,15 +60,10 @@ protected void doExecute(Task task, TrainingModelRequest request, // Get the size of the training request and then route the request. We get/set this here, as opposed to in // TrainingModelTransportAction, because in the future, we may want to use size to factor into our routing // decision. - KNNCounter.TRAINING_REQUESTS.increment(); - ActionListener wrappedListener = ActionListener.wrap(listener::onResponse, ex -> { - KNNCounter.TRAINING_ERRORS.increment(); - listener.onFailure(ex); - }); getTrainingIndexSizeInKB(request, ActionListener.wrap(size -> { request.setTrainingDataSizeInKB(size); - routeRequest(request, wrappedListener); - }, wrappedListener::onFailure)); + routeRequest(request, listener); + }, listener::onFailure)); } protected void routeRequest(TrainingModelRequest request, ActionListener listener) { diff --git a/src/main/java/org/opensearch/knn/plugin/transport/TrainingModelTransportAction.java b/src/main/java/org/opensearch/knn/plugin/transport/TrainingModelTransportAction.java index eac9c0c45..2645ddba3 100644 --- a/src/main/java/org/opensearch/knn/plugin/transport/TrainingModelTransportAction.java +++ b/src/main/java/org/opensearch/knn/plugin/transport/TrainingModelTransportAction.java @@ -19,6 +19,7 @@ import org.opensearch.knn.index.memory.NativeMemoryCacheManager; import org.opensearch.knn.index.memory.NativeMemoryEntryContext; import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy; +import org.opensearch.knn.plugin.stats.KNNCounter; import org.opensearch.knn.training.TrainingJob; import org.opensearch.knn.training.TrainingJobRunner; import org.opensearch.tasks.Task; @@ -44,6 +45,7 @@ public TrainingModelTransportAction(TransportService transportService, @Override protected void doExecute(Task task, TrainingModelRequest request, ActionListener listener) { + NativeMemoryEntryContext.TrainingDataEntryContext trainingDataEntryContext = new NativeMemoryEntryContext.TrainingDataEntryContext( request.getTrainingDataSizeInKB(), @@ -72,13 +74,19 @@ protected void doExecute(Task task, TrainingModelRequest request, request.getDescription() ); + KNNCounter.TRAINING_REQUESTS.increment(); + ActionListener wrappedListener = ActionListener.wrap(listener::onResponse, ex -> { + KNNCounter.TRAINING_ERRORS.increment(); + listener.onFailure(ex); + }); + try { TrainingJobRunner.getInstance().execute(trainingJob, ActionListener.wrap( - indexResponse -> listener.onResponse(new TrainingModelResponse(indexResponse.getId())), - listener::onFailure) + indexResponse -> wrappedListener.onResponse(new TrainingModelResponse(indexResponse.getId())), + wrappedListener::onFailure) ); } catch (IOException e) { - listener.onFailure(e); + wrappedListener.onFailure(e); } } } From bb8ec21ebde87666711fd406814e2d9802cf9f2e Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 10:41:37 -0800 Subject: [PATCH 06/10] Increment error count Signed-off-by: John Mazanec --- .../opensearch/knn/training/TrainingJobRunner.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/opensearch/knn/training/TrainingJobRunner.java b/src/main/java/org/opensearch/knn/training/TrainingJobRunner.java index 302b5d8a5..77fdd2dc1 100644 --- a/src/main/java/org/opensearch/knn/training/TrainingJobRunner.java +++ b/src/main/java/org/opensearch/knn/training/TrainingJobRunner.java @@ -19,6 +19,7 @@ import org.opensearch.knn.indices.ModelDao; import org.opensearch.knn.indices.ModelMetadata; import org.opensearch.knn.indices.ModelState; +import org.opensearch.knn.plugin.stats.KNNCounter; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; @@ -85,6 +86,7 @@ public void execute(TrainingJob trainingJob, ActionListener liste if (!semaphore.tryAcquire()) { ValidationException exception = new ValidationException(); exception.addValidationError("Unable to run training job: No training capacity on node."); + KNNCounter.TRAINING_ERRORS.increment(); throw exception; } @@ -125,8 +127,11 @@ private void train(TrainingJob trainingJob) { ActionListener loggingListener = ActionListener.wrap( indexResponse -> logger.debug("[KNN] Model serialization update for \"" + trainingJob.getModelId() + "\" was successful"), - e -> logger.error("[KNN] Model serialization update for \"" + trainingJob.getModelId() + - "\" failed: " + e.getMessage()) + e -> { + logger.error("[KNN] Model serialization update for \"" + trainingJob.getModelId() + + "\" failed: " + e.getMessage()); + KNNCounter.TRAINING_ERRORS.increment(); + } ); try { @@ -136,9 +141,11 @@ private void train(TrainingJob trainingJob) { serializeModel(trainingJob, loggingListener, true); } catch (IOException e) { logger.error("Unable to serialize model \"" + trainingJob.getModelId() + "\": " + e.getMessage()); + KNNCounter.TRAINING_ERRORS.increment(); } catch (Exception e) { logger.error("Unable to complete training for \"" + trainingJob.getModelId() + "\": " + e.getMessage()); + KNNCounter.TRAINING_ERRORS.increment(); } finally { jobCount.decrementAndGet(); semaphore.release(); @@ -158,6 +165,7 @@ private void train(TrainingJob trainingJob) { } finally { jobCount.decrementAndGet(); semaphore.release(); + KNNCounter.TRAINING_ERRORS.increment(); } } } From 2c2f25974107111d721b89f128fb2b7092f19e82 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 10:45:01 -0800 Subject: [PATCH 07/10] Move initialized into priv block Signed-off-by: John Mazanec --- src/main/java/org/opensearch/knn/jni/FaissService.java | 2 +- src/main/java/org/opensearch/knn/jni/NmslibService.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/opensearch/knn/jni/FaissService.java b/src/main/java/org/opensearch/knn/jni/FaissService.java index 5287bc603..09be29a6a 100644 --- a/src/main/java/org/opensearch/knn/jni/FaissService.java +++ b/src/main/java/org/opensearch/knn/jni/FaissService.java @@ -33,9 +33,9 @@ class FaissService { AccessController.doPrivileged((PrivilegedAction) () -> { System.loadLibrary(KNNConstants.FAISS_JNI_LIBRARY_NAME); initLibrary(); + KNNEngine.FAISS.setInitialized(true); return null; }); - KNNEngine.FAISS.setInitialized(true); } /** diff --git a/src/main/java/org/opensearch/knn/jni/NmslibService.java b/src/main/java/org/opensearch/knn/jni/NmslibService.java index 8429b3de0..b02cdca0f 100644 --- a/src/main/java/org/opensearch/knn/jni/NmslibService.java +++ b/src/main/java/org/opensearch/knn/jni/NmslibService.java @@ -33,9 +33,9 @@ class NmslibService { AccessController.doPrivileged((PrivilegedAction) () -> { System.loadLibrary(KNNConstants.NMSLIB_JNI_LIBRARY_NAME); initLibrary(); + KNNEngine.NMSLIB.setInitialized(true); return null; }); - KNNEngine.NMSLIB.setInitialized(true); } /** From 3faafcb502b4c6a3d577a3b1e729d3198317e420 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 10:49:24 -0800 Subject: [PATCH 08/10] Fix divide by 0 Signed-off-by: John Mazanec --- .../index/memory/NativeMemoryCacheManager.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java b/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java index 5ea430293..5672b4a94 100644 --- a/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java +++ b/src/main/java/org/opensearch/knn/index/memory/NativeMemoryCacheManager.java @@ -123,7 +123,7 @@ public long getCacheSizeInKilobytes() { * @return Percentage of the cache full */ public Float getCacheSizeAsPercentage() { - return 100 * getCacheSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + return getSizeAsPercentage(getCacheSizeInKilobytes()); } /** @@ -144,7 +144,7 @@ public long getIndicesSizeInKilobytes() { * @return Percentage of the cache full */ public Float getIndicesSizeAsPercentage() { - return 100 * getIndicesSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + return getSizeAsPercentage(getIndicesSizeInKilobytes()); } /** @@ -170,7 +170,7 @@ public Long getIndexSizeInKilobytes(final String indexName) { */ public Float getIndexSizeAsPercentage(final String indexName) { Validate.notNull(indexName, "Index name cannot be null"); - return 100 * getIndexSizeInKilobytes(indexName) / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + return getSizeAsPercentage(getIndexSizeInKilobytes(indexName)); } /** @@ -194,7 +194,7 @@ public long getTrainingSizeInKilobytes() { * @return Percentage of the cache full */ public Float getTrainingSizeAsPercentage() { - return 100 * getTrainingSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb(); + return getSizeAsPercentage(getTrainingSizeInKilobytes()); } /** @@ -329,4 +329,12 @@ private void onRemoval(RemovalNotification remov logger.debug("[KNN] Cache evicted. Key {}, Reason: {}", removalNotification.getKey(), removalNotification.getCause()); } + + private Float getSizeAsPercentage(long size) { + long cbLimit = KNNSettings.getCircuitBreakerLimit().getKb(); + if (cbLimit == 0) { + return 0.0F; + } + return 100 * size / (float) cbLimit; + } } From 1c3c154e2885994ba66d407649c8c72c52acc4ba Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 11:24:19 -0800 Subject: [PATCH 09/10] Remove unneccesary initialized Signed-off-by: John Mazanec --- src/main/java/org/opensearch/knn/index/util/KNNEngine.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java index 26fe87cb6..8c7023542 100644 --- a/src/main/java/org/opensearch/knn/index/util/KNNEngine.java +++ b/src/main/java/org/opensearch/knn/index/util/KNNEngine.java @@ -17,7 +17,6 @@ import org.opensearch.knn.index.SpaceType; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; import static org.opensearch.knn.common.KNNConstants.NMSLIB_NAME; @@ -42,11 +41,9 @@ public enum KNNEngine implements KNNLibrary { KNNEngine(String name, KNNLibrary knnLibrary) { this.name = name; this.knnLibrary = knnLibrary; - this.initialized = new AtomicBoolean(false); } private String name; - private AtomicBoolean initialized; private KNNLibrary knnLibrary; /** From 47d84f39c876cbe59d87b277c4f7bfb7060588c7 Mon Sep 17 00:00:00 2001 From: John Mazanec Date: Tue, 9 Nov 2021 11:32:20 -0800 Subject: [PATCH 10/10] Remove unused imports Signed-off-by: John Mazanec --- .../knn/plugin/transport/TrainingJobRouterTransportAction.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java index 8016b1333..0a24c0c47 100644 --- a/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java +++ b/src/main/java/org/opensearch/knn/plugin/transport/TrainingJobRouterTransportAction.java @@ -23,13 +23,11 @@ import org.opensearch.common.ValidationException; import org.opensearch.common.collect.ImmutableOpenMap; import org.opensearch.common.inject.Inject; -import org.opensearch.knn.plugin.stats.KNNCounter; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.tasks.Task; import org.opensearch.transport.TransportRequestOptions; import org.opensearch.transport.TransportService; -import javax.swing.*; import java.util.concurrent.RejectedExecutionException; import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES;