From 0ae7ea2b943200c75970f146cd297afdb8990489 Mon Sep 17 00:00:00 2001 From: Daniel Mitterdorfer Date: Tue, 29 Jan 2019 14:15:14 +0100 Subject: [PATCH] Ignore JSON logs for merge parts analysis With this commit we ignore any JSON log files when analyzing merge parts. Because Elasticsearch writes both plain text and JSON log files with elastic/elasticsearch#36833, we need to ignore one of them in order to avoid double-counting. We choose to ignore the JSON files for now as this is backwards-compatible. --- esrally/mechanic/telemetry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/esrally/mechanic/telemetry.py b/esrally/mechanic/telemetry.py index 35ad1d964..81d8180e5 100644 --- a/esrally/mechanic/telemetry.py +++ b/esrally/mechanic/telemetry.py @@ -656,12 +656,14 @@ def on_benchmark_stop(self): merge_times = {} for log_file in os.listdir(self.node_log_dir): log_path = "%s/%s" % (self.node_log_dir, log_file) - if not io.is_archive(log_file): + if io.is_archive(log_file): + self.logger.debug("Skipping archived logs in [%s].", log_path) + elif io.has_extension(log_file, ".json"): + self.logger.debug("Skipping JSON-formatted logs in [%s].", log_path) + else: self.logger.debug("Analyzing merge times in [%s]", log_path) with open(log_path, mode="rt", encoding="utf-8") as f: self._extract_merge_times(f, merge_times) - else: - self.logger.debug("Skipping archived logs in [%s].", log_path) if merge_times: self._store_merge_times(merge_times) self.logger.info("Finished analyzing merge times. Extracted [%s] different merge time components.", len(merge_times))