From 4de9f83cbc35ca9a21bc7326c3254a4b40b233ec Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Wed, 25 Jan 2023 10:25:36 -0800 Subject: [PATCH] Adding p99.9, p100 and num_of_segments metrics to perf-tool (#739) * Adding p99.9, p100 and num_of_segments metrics to perf-tool Signed-off-by: Martin Gaievski --- benchmarks/perf-tool/README.md | 27 +++++++++--- .../perf-tool/okpt/test/steps/factory.py | 5 ++- benchmarks/perf-tool/okpt/test/steps/steps.py | 41 ++++++++++++++++++- benchmarks/perf-tool/okpt/test/test.py | 10 ++++- 4 files changed, 75 insertions(+), 8 deletions(-) diff --git a/benchmarks/perf-tool/README.md b/benchmarks/perf-tool/README.md index d4404f551..9c1c18918 100644 --- a/benchmarks/perf-tool/README.md +++ b/benchmarks/perf-tool/README.md @@ -272,12 +272,12 @@ Runs a set of queries against an index. ##### Metrics -| Metric Name | Description | Unit | -| ----------- | ----------- | ----------- | -| took | Took times returned per query aggregated as total, p50, p90 and p99 (when applicable) | ms | -| memory_kb | Native memory k-NN is using at the end of the query workload | KB | +| Metric Name | Description | Unit | +| ----------- |---------------------------------------------------------------------------------------------------------| ----------- | +| took | Took times returned per query aggregated as total, p50, p90, p99, p99.9 and p100 (when applicable) | ms | +| memory_kb | Native memory k-NN is using at the end of the query workload | KB | | recall@R | ratio of top R results from the ground truth neighbors that are in the K results returned by the plugin | float 0.0-1.0 | -| recall@K | ratio of results returned that were ground truth nearest neighbors | float 0.0-1.0 | +| recall@K | ratio of results returned that were ground truth nearest neighbors | float 0.0-1.0 | #### query_with_filter @@ -311,6 +311,23 @@ Runs a set of queries with filter against an index. | recall@R | ratio of top R results from the ground truth neighbors that are in the K results returned by the plugin | float 0.0-1.0 | | recall@K | ratio of results returned that were ground truth nearest neighbors | float 0.0-1.0 | +#### get_stats + +Gets the index stats. + +##### Parameters + +| Parameter Name | Description | Default | +| ----------- |-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------| +| index_name | Name of index to search | No default | + +##### Metrics + +| Metric Name | Description | Unit | +| ----------- |-------------------------------------------------|------------| +| num_of_committed_segments | Total number of commited segments in the index | integer >= 0 | +| num_of_search_segments | Total number of search segments in the index | integer >= 0 | + ### Data sets This benchmark tool uses pre-generated data sets to run indexing and query workload. For some benchmark types existing dataset need to be diff --git a/benchmarks/perf-tool/okpt/test/steps/factory.py b/benchmarks/perf-tool/okpt/test/steps/factory.py index 2e53b4d4d..ba0fc5b60 100644 --- a/benchmarks/perf-tool/okpt/test/steps/factory.py +++ b/benchmarks/perf-tool/okpt/test/steps/factory.py @@ -9,7 +9,8 @@ from okpt.test.steps.base import Step, StepConfig from okpt.test.steps.steps import CreateIndexStep, DisableRefreshStep, RefreshIndexStep, DeleteIndexStep, \ - TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, QueryStep, QueryWithFilterStep + TrainModelStep, DeleteModelStep, ForceMergeStep, ClearCacheStep, IngestStep, IngestMultiFieldStep, \ + QueryStep, QueryWithFilterStep, GetStatsStep def create_step(step_config: StepConfig) -> Step: @@ -37,5 +38,7 @@ def create_step(step_config: StepConfig) -> Step: return ForceMergeStep(step_config) elif step_config.step_name == ClearCacheStep.label: return ClearCacheStep(step_config) + elif step_config.step_name == GetStatsStep.label: + return GetStatsStep(step_config) raise ConfigurationError(f'Invalid step {step_config.step_name}') diff --git a/benchmarks/perf-tool/okpt/test/steps/steps.py b/benchmarks/perf-tool/okpt/test/steps/steps.py index bc43bf195..0de61078f 100644 --- a/benchmarks/perf-tool/okpt/test/steps/steps.py +++ b/benchmarks/perf-tool/okpt/test/steps/steps.py @@ -454,7 +454,8 @@ def _action(self): results['took'] = [ float(query_response['took']) for query_response in query_responses ] - results['memory_kb'] = get_cache_size_in_kb(self.endpoint, 80) + port = 9200 if self.endpoint == 'localhost' else 80 + results['memory_kb'] = get_cache_size_in_kb(self.endpoint, port) if self.calculate_recall: ids = [[int(hit['_id']) @@ -588,6 +589,41 @@ def get_body(self, vec): else: raise ConfigurationError('Not supported filter type {}'.format(self.filter_type)) +class GetStatsStep(OpenSearchStep): + """See base class.""" + + label = 'get_stats' + + def __init__(self, step_config: StepConfig): + super().__init__(step_config) + + self.index_name = parse_string_param('index_name', step_config.config, + {}, None) + + def _action(self): + """Get stats for cluster/index etc. + + Returns: + Stats with following info: + - number of committed and search segments in the index + """ + results = {} + segment_stats = get_segment_stats(self.opensearch, self.index_name) + shards = segment_stats["indices"][self.index_name]["shards"] + num_of_committed_segments = 0 + num_of_search_segments = 0; + for shard_key in shards.keys(): + for segment in shards[shard_key]: + + num_of_committed_segments += segment["num_committed_segments"] + num_of_search_segments += segment["num_search_segments"] + + results['committed_segments'] = num_of_committed_segments + results['search_segments'] = num_of_search_segments + return results + + def _get_measures(self) -> List[str]: + return ['committed_segments', 'search_segments'] # Helper functions - (AKA not steps) def bulk_transform(partition: np.ndarray, field_name: str, action, @@ -755,3 +791,6 @@ def query_index(opensearch: OpenSearch, index_name: str, body: dict, def bulk_index(opensearch: OpenSearch, index_name: str, body: List): return opensearch.bulk(index=index_name, body=body, timeout='5m') + +def get_segment_stats(opensearch: OpenSearch, index_name: str): + return opensearch.indices.segments(index=index_name) diff --git a/benchmarks/perf-tool/okpt/test/test.py b/benchmarks/perf-tool/okpt/test/test.py index dbd65d053..c947545ad 100644 --- a/benchmarks/perf-tool/okpt/test/test.py +++ b/benchmarks/perf-tool/okpt/test/test.py @@ -53,6 +53,8 @@ def _pxx(values: List[Any], p: float): if p < 0 or p > 1: return -1.0 elif p < lowest_percentile or p > highest_percentile: + if p == 1.0 and len(values) > 1: + return float(values[len(values) - 1]) return -1.0 else: return float(values[floor(len(values) * p)]) @@ -105,7 +107,7 @@ def _aggregate_steps(step_results: List[Dict[str, Any]], for measure_label in step_measure_labels: step_measure = step[measure_label] - step_measure_label = f'{step_label}_{measure_label}' + step_measure_label = f'{measure_label}' if step_label == 'get_stats' else f'{step_label}_{measure_label}' # Add cumulative test measures from steps to test measures if measure_label in measure_labels: @@ -137,6 +139,12 @@ def _aggregate_steps(step_results: List[Dict[str, Any]], p99 = _pxx(step_measure, 0.99) if p99 != -1: aggregate[step_measure_label + '_p99'] = p99 + p99_9 = _pxx(step_measure, 0.999) + if p99_9 != -1: + aggregate[step_measure_label + '_p99.9'] = p99_9 + p100 = _pxx(step_measure, 1.00) + if p100 != -1: + aggregate[step_measure_label + '_p100'] = p100 return aggregate