Skip to content

Commit

Permalink
Add training stats and library initialized stats (opensearch-project#191
Browse files Browse the repository at this point in the history
)

Signed-off-by: John Mazanec <[email protected]>
  • Loading branch information
jmazanec15 authored Nov 9, 2021
1 parent b786acf commit bbfb373
Show file tree
Hide file tree
Showing 13 changed files with 321 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,36 @@ public long getCacheSizeInKilobytes() {
return cache.asMap().values().stream().mapToLong(NativeMemoryAllocation::getSizeInKB).sum();
}

/**
* Returns how full the cache is as a percentage of the total cache capacity.
*
* @return Percentage of the cache full
*/
public Float getCacheSizeAsPercentage() {
return getSizeAsPercentage(getCacheSizeInKilobytes());
}

/**
* Getter for current size of all indices in Kilobytes.
*
* @return current size of the cache
*/
public long getIndicesSizeInKilobytes() {
return cache.asMap().values().stream()
.filter(nativeMemoryAllocation -> nativeMemoryAllocation instanceof NativeMemoryAllocation.IndexAllocation)
.mapToLong(NativeMemoryAllocation::getSizeInKB)
.sum();
}

/**
* Returns how full the cache is as a percentage of the total cache capacity.
*
* @return Percentage of the cache full
*/
public Float getIndicesSizeAsPercentage() {
return getSizeAsPercentage(getIndicesSizeInKilobytes());
}

/**
* Returns the current size of an index in the cache in KiloBytes.
*
Expand All @@ -133,23 +163,38 @@ public Long getIndexSizeInKilobytes(final String indexName) {
}

/**
* Returns how full the cache is as a percentage of the total cache capacity.
* Returns the how much space an index is taking up in the cache as a percentage of the total cache capacity.
*
* @param indexName name of the index
* @return Percentage of the cache full
*/
public Float getCacheSizeAsPercentage() {
return 100 * getCacheSizeInKilobytes() / (float) KNNSettings.getCircuitBreakerLimit().getKb();
public Float getIndexSizeAsPercentage(final String indexName) {
Validate.notNull(indexName, "Index name cannot be null");
return getSizeAsPercentage(getIndexSizeInKilobytes(indexName));
}

/**
* Returns the how much space an index is taking up in the cache as a percentage of the total cache capacity.
* Getter for current size of all training jobs in Kilobytes.
*
* @return current size of the cache
*/
public long getTrainingSizeInKilobytes() {
// Currently, all allocations that are not index allocations will be for training.
return cache.asMap().values().stream()
.filter(nativeMemoryAllocation ->
nativeMemoryAllocation instanceof NativeMemoryAllocation.TrainingDataAllocation ||
nativeMemoryAllocation instanceof NativeMemoryAllocation.AnonymousAllocation)
.mapToLong(NativeMemoryAllocation::getSizeInKB)
.sum();
}

/**
* Returns how full the cache is as a percentage of the total cache capacity.
*
* @param indexName name of the index
* @return Percentage of the cache full
*/
public Float getIndexSizeAsPercentage(final String indexName) {
Validate.notNull(indexName, "Index name cannot be null");
return 100 * getIndexSizeInKilobytes(indexName) / (float) KNNSettings.getCircuitBreakerLimit().getKb();
public Float getTrainingSizeAsPercentage() {
return getSizeAsPercentage(getTrainingSizeInKilobytes());
}

/**
Expand Down Expand Up @@ -245,9 +290,9 @@ public void setCacheCapacityReached(Boolean value) {
}

/**
* Get the stats of all of the Elasticsearch indices currently loaded into the cache
* Get the stats of all of the OpenSearch indices currently loaded into the cache
*
* @return Map containing all of the Elasticsearch indices in the cache and their stats
* @return Map containing all of the OpenSearch indices in the cache and their stats
*/
public Map<String, Map<String, Object>> getIndicesCacheStats() {
Map<String, Map<String, Object>> statValues = new HashMap<>();
Expand Down Expand Up @@ -284,4 +329,12 @@ private void onRemoval(RemovalNotification<String, NativeMemoryAllocation> remov
logger.debug("[KNN] Cache evicted. Key {}, Reason: {}", removalNotification.getKey(),
removalNotification.getCause());
}

private Float getSizeAsPercentage(long size) {
long cbLimit = KNNSettings.getCircuitBreakerLimit().getKb();
if (cbLimit == 0) {
return 0.0F;
}
return 100 * size / (float) cbLimit;
}
}
10 changes: 10 additions & 0 deletions src/main/java/org/opensearch/knn/index/util/KNNEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,14 @@ public Map<String, Object> getMethodAsMap(KNNMethodContext knnMethodContext) {
public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) {
return knnLibrary.estimateOverheadInKB(knnMethodContext, dimension);
}

@Override
public Boolean isInitialized() {
return knnLibrary.isInitialized();
}

@Override
public void setInitialized(Boolean isInitialized) {
knnLibrary.setInitialized(isInitialized);
}
}
27 changes: 27 additions & 0 deletions src/main/java/org/opensearch/knn/index/util/KNNLibrary.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;

import static org.opensearch.knn.common.KNNConstants.BYTES_PER_KILOBYTES;
Expand Down Expand Up @@ -138,6 +139,20 @@ public interface KNNLibrary {
*/
Map<String, Object> getMethodAsMap(KNNMethodContext knnMethodContext);

/**
* Getter for initialized
*
* @return whether library has been initialized
*/
Boolean isInitialized();

/**
* Set initialized to true
*
* @param isInitialized whether library has been initialized
*/
void setInitialized(Boolean isInitialized);

/**
* Abstract implementation of KNNLibrary. It contains several default methods and fields that
* are common across different underlying libraries.
Expand All @@ -148,6 +163,7 @@ abstract class NativeLibrary implements KNNLibrary {
private String latestLibraryBuildVersion;
private String latestLibraryVersion;
private String extension;
private AtomicBoolean initialized;

/**
* Constructor for NativeLibrary
Expand All @@ -166,6 +182,7 @@ public NativeLibrary(Map<String, KNNMethod> methods, Map<SpaceType, Function<Flo
this.latestLibraryBuildVersion = latestLibraryBuildVersion;
this.latestLibraryVersion = latestLibraryVersion;
this.extension = extension;
this.initialized = new AtomicBoolean(false);
}

@Override
Expand Down Expand Up @@ -235,6 +252,16 @@ public Map<String, Object> getMethodAsMap(KNNMethodContext knnMethodContext) {

return knnMethod.getAsMap(knnMethodContext);
}

@Override
public Boolean isInitialized() {
return initialized.get();
}

@Override
public void setInitialized(Boolean isInitialized) {
initialized.set(isInitialized);
}
}

/**
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/opensearch/knn/jni/FaissService.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.KNNQueryResult;
import org.opensearch.knn.index.util.KNNEngine;

import java.security.AccessController;
import java.security.PrivilegedAction;
Expand All @@ -32,6 +33,7 @@ class FaissService {
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
System.loadLibrary(KNNConstants.FAISS_JNI_LIBRARY_NAME);
initLibrary();
KNNEngine.FAISS.setInitialized(true);
return null;
});
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/opensearch/knn/jni/NmslibService.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.KNNQueryResult;
import org.opensearch.knn.index.util.KNNEngine;

import java.security.AccessController;
import java.security.PrivilegedAction;
Expand All @@ -32,6 +33,7 @@ class NmslibService {
AccessController.doPrivileged((PrivilegedAction<Void>) () -> {
System.loadLibrary(KNNConstants.NMSLIB_JNI_LIBRARY_NAME);
initLibrary();
KNNEngine.NMSLIB.setInitialized(true);
return null;
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ public enum KNNCounter {
SCRIPT_COMPILATIONS("script_compilations"),
SCRIPT_COMPILATION_ERRORS("script_compilation_errors"),
SCRIPT_QUERY_REQUESTS("script_query_requests"),
SCRIPT_QUERY_ERRORS("script_query_errors");
SCRIPT_QUERY_ERRORS("script_query_errors"),
TRAINING_REQUESTS("training_requests"),
TRAINING_ERRORS("training_errors");

private String name;
private AtomicLong count;
Expand Down
18 changes: 16 additions & 2 deletions src/main/java/org/opensearch/knn/plugin/stats/KNNStatsConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
import com.google.common.collect.ImmutableMap;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.util.KNNEngine;
import org.opensearch.knn.indices.ModelCache;
import org.opensearch.knn.indices.ModelDao;
import org.opensearch.knn.plugin.stats.suppliers.LibraryInitializedSupplier;
import org.opensearch.knn.plugin.stats.suppliers.EventOccurredWithinThresholdSupplier;
import org.opensearch.knn.plugin.stats.suppliers.KNNCircuitBreakerSupplier;
import org.opensearch.knn.plugin.stats.suppliers.KNNCounterSupplier;
Expand All @@ -57,9 +59,9 @@ public class KNNStatsConfig {
.put(StatNames.EVICTION_COUNT.getName(), new KNNStat<>(false,
new KNNInnerCacheStatsSupplier(CacheStats::evictionCount)))
.put(StatNames.GRAPH_MEMORY_USAGE.getName(), new KNNStat<>(false,
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getCacheSizeInKilobytes)))
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesSizeInKilobytes)))
.put(StatNames.GRAPH_MEMORY_USAGE_PERCENTAGE.getName(), new KNNStat<>(false,
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getCacheSizeAsPercentage)))
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesSizeAsPercentage)))
.put(StatNames.INDICES_IN_CACHE.getName(), new KNNStat<>(false,
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getIndicesCacheStats)))
.put(StatNames.CACHE_CAPACITY_REACHED.getName(), new KNNStat<>(false,
Expand Down Expand Up @@ -91,5 +93,17 @@ public class KNNStatsConfig {
new ModelIndexingDegradingSupplier(ModelCache::getEvictedDueToSizeAt),
KNNConstants.MODEL_CACHE_CAPACITY_ATROPHY_THRESHOLD_IN_MINUTES,
ChronoUnit.MINUTES)))
.put(StatNames.FAISS_LOADED.getName(), new KNNStat<>(false,
new LibraryInitializedSupplier(KNNEngine.FAISS)))
.put(StatNames.NMSLIB_LOADED.getName(), new KNNStat<>(false,
new LibraryInitializedSupplier(KNNEngine.NMSLIB)))
.put(StatNames.TRAINING_REQUESTS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.TRAINING_REQUESTS)))
.put(StatNames.TRAINING_ERRORS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.TRAINING_ERRORS)))
.put(StatNames.TRAINING_MEMORY_USAGE.getName(), new KNNStat<>(false,
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getTrainingSizeInKilobytes)))
.put(StatNames.TRAINING_MEMORY_USAGE_PERCENTAGE.getName(), new KNNStat<>(false,
new NativeMemoryCacheManagerSupplier<>(NativeMemoryCacheManager::getTrainingSizeAsPercentage)))
.build();
}
6 changes: 6 additions & 0 deletions src/main/java/org/opensearch/knn/plugin/stats/StatNames.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public enum StatNames {
INDICES_IN_CACHE("indices_in_cache"),
CIRCUIT_BREAKER_TRIGGERED("circuit_breaker_triggered"),
MODEL_INDEX_STATUS("model_index_status"),
FAISS_LOADED("faiss_initialized"),
NMSLIB_LOADED("nmslib_initialized"),
INDEXING_FROM_MODEL_DEGRADED("indexing_from_model_degraded"),
GRAPH_QUERY_ERRORS(KNNCounter.GRAPH_QUERY_ERRORS.getName()),
GRAPH_QUERY_REQUESTS(KNNCounter.GRAPH_QUERY_REQUESTS.getName()),
Expand All @@ -53,6 +55,10 @@ public enum StatNames {
SCRIPT_COMPILATIONS(KNNCounter.SCRIPT_COMPILATIONS.getName()),
SCRIPT_COMPILATION_ERRORS(KNNCounter.SCRIPT_COMPILATION_ERRORS.getName()),
SCRIPT_QUERY_REQUESTS(KNNCounter.SCRIPT_QUERY_REQUESTS.getName()),
TRAINING_REQUESTS(KNNCounter.TRAINING_REQUESTS.getName()),
TRAINING_ERRORS(KNNCounter.TRAINING_ERRORS.getName()),
TRAINING_MEMORY_USAGE("training_memory_usage"),
TRAINING_MEMORY_USAGE_PERCENTAGE("training_memory_usage_percentage"),
SCRIPT_QUERY_ERRORS(KNNCounter.SCRIPT_QUERY_ERRORS.getName());

private String name;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.knn.plugin.stats.suppliers;

import org.opensearch.knn.index.util.KNNLibrary;

import java.util.function.Supplier;

/**
* Supplier to determine whether library has been initialized
*/
public class LibraryInitializedSupplier implements Supplier<Boolean> {
private KNNLibrary knnLibrary;

/**
* Constructor
*
* @param knnLibrary to check if initialized
*/
public LibraryInitializedSupplier(KNNLibrary knnLibrary) {
this.knnLibrary = knnLibrary;
}

@Override
public Boolean get() {
return knnLibrary.isInitialized();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;
import org.opensearch.knn.index.memory.NativeMemoryEntryContext;
import org.opensearch.knn.index.memory.NativeMemoryLoadStrategy;
import org.opensearch.knn.plugin.stats.KNNCounter;
import org.opensearch.knn.training.TrainingJob;
import org.opensearch.knn.training.TrainingJobRunner;
import org.opensearch.tasks.Task;
Expand All @@ -44,6 +45,7 @@ public TrainingModelTransportAction(TransportService transportService,
@Override
protected void doExecute(Task task, TrainingModelRequest request,
ActionListener<TrainingModelResponse> listener) {

NativeMemoryEntryContext.TrainingDataEntryContext trainingDataEntryContext =
new NativeMemoryEntryContext.TrainingDataEntryContext(
request.getTrainingDataSizeInKB(),
Expand Down Expand Up @@ -72,13 +74,19 @@ protected void doExecute(Task task, TrainingModelRequest request,
request.getDescription()
);

KNNCounter.TRAINING_REQUESTS.increment();
ActionListener<TrainingModelResponse> wrappedListener = ActionListener.wrap(listener::onResponse, ex -> {
KNNCounter.TRAINING_ERRORS.increment();
listener.onFailure(ex);
});

try {
TrainingJobRunner.getInstance().execute(trainingJob, ActionListener.wrap(
indexResponse -> listener.onResponse(new TrainingModelResponse(indexResponse.getId())),
listener::onFailure)
indexResponse -> wrappedListener.onResponse(new TrainingModelResponse(indexResponse.getId())),
wrappedListener::onFailure)
);
} catch (IOException e) {
listener.onFailure(e);
wrappedListener.onFailure(e);
}
}
}
Loading

0 comments on commit bbfb373

Please sign in to comment.