From caf108ee97a14b35884a30c62dfea3f3f3139f78 Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Thu, 29 Aug 2024 11:03:50 -0400 Subject: [PATCH 1/9] add logsdb to http_logs: * add new parameters for logsdb and data streams * remove index.json * add a composable template supporting standard indices, data streams, and logs data streams * Update README --- http_logs/README.md | 2 + http_logs/index-template.json | 67 ++++++++++++++++++++++++++ http_logs/index.json | 55 ---------------------- http_logs/track.json | 89 ++++++++++++++++++----------------- 4 files changed, 116 insertions(+), 97 deletions(-) create mode 100644 http_logs/index-template.json delete mode 100644 http_logs/index.json diff --git a/http_logs/README.md b/http_logs/README.md index 001f394d..8a7e861d 100644 --- a/http_logs/README.md +++ b/http_logs/README.md @@ -42,6 +42,8 @@ This track allows to overwrite the following parameters with Rally 0.8.0+ using * `number_of_shards` (default: 5) * `source_enabled` (default: true): A boolean defining whether the `_source` field is stored in the index. * `index_settings`: A list of index settings. Index settings defined elsewhere (e.g. `number_of_replicas`) need to be overridden explicitly. +* `index_mode` (default: unset): Set to `logsdb` to enable indexing to [logs data streams](https://www.elastic.co/guide/en/elasticsearch/reference/master/logs-data-stream.html). If not enabled, Rally will not use logs data streams. +* `index_type` (default: unset): Set to `data_stream` to enable indexing to data streams. `index_type` is not required when `index_mode` is set to `logsdb`. * `cluster_health` (default: "green"): The minimum required cluster health. * `ingest_pipeline`: Only applicable for `--challenge=append-index-only-with-ingest-pipeline`, selects which ingest node pipeline to run. Valid options are `'baseline'` (default), `'grok'` and `'geoip'`. For example: `--challenge=append-index-only-with-ingest-pipeline --track-params="ingest_pipeline:'baseline'" ` diff --git a/http_logs/index-template.json b/http_logs/index-template.json new file mode 100644 index 00000000..d92c2f6b --- /dev/null +++ b/http_logs/index-template.json @@ -0,0 +1,67 @@ +{ + "priority": 101, + "index_patterns": ["logs-*", "reindexed-logs"], + {%- if index_mode == "logsdb" or index_type == "data_stream" %} + "data_stream": {}, + {%- endif %} + "template": { + "settings": { + {%- if index_mode == "logsdb" %} + "index.mode": "logsdb", + {%- endif %} + {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%} + "index.number_of_shards": {{ number_of_shards | default(5) }}, + "index.number_of_replicas": {{ number_of_replicas | default(0) }}, + "index.requests.cache.enable": false + {%- endif -%}{# non-serverless-index-settings-marker-end #} + }, + "mappings": { + "dynamic": "strict", + {%- if index_mode != "logsdb" %} + "_source": { + "enabled": {{ source_enabled | default(true) | tojson }} + }, + {%- endif %} + "properties": { + "@timestamp": { + {%- if ingest_pipeline is defined and ingest_pipeline == "grok" %} + "format": "strict_date_optional_time", + {%- else %} + "format": "epoch_second", + {%- endif %} + "type": "date" + }, + "message": { + "type": "keyword", + "index": false, + "doc_values": false + }, + "clientip": { + "type": "ip" + }, + "request": { + "type": "match_only_text", + "fields": { + "raw": { + "ignore_above": 256, + "type": "keyword" + } + } + }, + "status": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "geoip" : { + "properties" : { + "country_name": { "type": "keyword" }, + "city_name": { "type": "keyword" }, + "location" : { "type" : "geo_point" } + } + } + } + } + } +} diff --git a/http_logs/index.json b/http_logs/index.json deleted file mode 100644 index c9389c1d..00000000 --- a/http_logs/index.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "settings": { - {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%} - "index.number_of_shards": {{ number_of_shards | default(5) }}, - "index.number_of_replicas": {{ number_of_replicas | default(0) }}, - "index.requests.cache.enable": false - {%- endif -%}{# non-serverless-index-settings-marker-end #} - }, - "mappings": { - "dynamic": "strict", - "_source": { - "enabled": {{ source_enabled | default(true) | tojson }} - }, - "properties": { - "@timestamp": { - {%- if ingest_pipeline is defined and ingest_pipeline == "grok" %} - "format": "strict_date_optional_time", - {%- else %} - "format": "epoch_second", - {%- endif %} - "type": "date" - }, - "message": { - "type": "keyword", - "index": false, - "doc_values": false - }, - "clientip": { - "type": "ip" - }, - "request": { - "type": "match_only_text", - "fields": { - "raw": { - "ignore_above": 256, - "type": "keyword" - } - } - }, - "status": { - "type": "integer" - }, - "size": { - "type": "integer" - }, - "geoip" : { - "properties" : { - "country_name": { "type": "keyword" }, - "city_name": { "type": "keyword" }, - "location" : { "type" : "geo_point" } - } - } - } - } -} diff --git a/http_logs/track.json b/http_logs/track.json index 63cea73f..5db1bdb9 100644 --- a/http_logs/track.json +++ b/http_logs/track.json @@ -1,53 +1,58 @@ {% import "rally.helpers" as rally with context %} - - {%- if runtime_fields is defined %} - {% set index_body = 'index-runtime-fields.json' %} - {% set query_range_ts_start = "1998-05-01T00:00:00Z" %} - {% set query_range_ts_end = "1998-05-02T00:00:00Z" %} - {% set search_after_ts = "1998-06-10" %} + {%- set index_body = 'index-runtime-fields.json' %} + {%- set query_range_ts_start = "1998-05-01T00:00:00Z" %} + {%- set query_range_ts_end = "1998-05-02T00:00:00Z" %} + {%- set search_after_ts = "1998-06-10" %} +{% else %} + {%- set index_body = 'index.json' %} + {%- set query_range_ts_start = "893980800" %} + {%- set query_range_ts_end = "894067200" %} + {%- set search_after_ts = "897436800" %} +{%- endif -%} +{%- if index_mode == "logsdb" or index_type == "data_stream" %} + {%- set target_type = "data-streams" | tojson %} + {%- set target_property = "target-data-stream" | tojson %} {%- else %} - {% set index_body = 'index.json' %} - {% set query_range_ts_start = "893980800" %} - {% set query_range_ts_end = "894067200" %} - {% set search_after_ts = "897436800" %} -{%- endif %} + {%- set target_type = "indices" | tojson %} + {%- set target_property = "target-index" | tojson %} +{%- endif -%} { "version": 2, "description": "HTTP server log data", "#TODO": "Replace index definitions with a template after setting the track version to 2. Explicit index definitions are not necessary anymore.", - "indices": [ + "composable-templates": [ { - "name": "logs-181998", - "body": "{{ index_body }}" + "name": "rally-http_logs", + "index-pattern": "logs-*", + "delete-matching-indices": true, + "template": "index-template.json" + } + ], + {{ target_type }}: [ + { + "name": "logs-181998" }, { - "name": "logs-191998", - "body": "{{ index_body }}" + "name": "logs-191998" }, { - "name": "logs-201998", - "body": "{{ index_body }}" + "name": "logs-201998" }, { - "name": "logs-211998", - "body": "{{ index_body }}" + "name": "logs-211998" }, { - "name": "logs-221998", - "body": "{{ index_body }}" + "name": "logs-221998" }, { - "name": "logs-231998", - "body": "{{ index_body }}" + "name": "logs-231998" }, { - "name": "logs-241998", - "body": "{{ index_body }}" + "name": "logs-241998" }, { - "name": "reindexed-logs", - "body": "{{ index_body }}" + "name": "reindexed-logs" } ], "corpora": [ @@ -57,49 +62,49 @@ "base-url": "https://rally-tracks.elastic.co/http_logs", "documents": [ { - "target-index": "logs-181998", + {{ target_property }}: "logs-181998", "source-file": "documents-181998.unparsed.json.bz2", "document-count": 2708746, "compressed-bytes": 13088137, "uncompressed-bytes": 303920342 }, { - "target-index": "logs-191998", + {{ target_property }}: "logs-191998", "source-file": "documents-191998.unparsed.json.bz2", "document-count": 9697882, "compressed-bytes": 47290776, "uncompressed-bytes": 1088378738 }, { - "target-index": "logs-201998", + {{ target_property }}: "logs-201998", "source-file": "documents-201998.unparsed.json.bz2", "document-count": 13053463, "compressed-bytes": 63278452, "uncompressed-bytes": 1456836090 }, { - "target-index": "logs-211998", + {{ target_property }}: "logs-211998", "source-file": "documents-211998.unparsed.json.bz2", "document-count": 17647279, "compressed-bytes": 85739523, "uncompressed-bytes": 1975990671 }, { - "target-index": "logs-221998", + {{ target_property }}: "logs-221998", "source-file": "documents-221998.unparsed.json.bz2", "document-count": 10716760, "compressed-bytes": 53264421, "uncompressed-bytes": 1202551382 }, { - "target-index": "logs-231998", + {{ target_property }}: "logs-231998", "source-file": "documents-231998.unparsed.json.bz2", "document-count": 11961342, "compressed-bytes": 60795929, "uncompressed-bytes": 1334381144 }, { - "target-index": "logs-241998", + {{ target_property }}: "logs-241998", "source-file": "documents-241998.unparsed.json.bz2", "document-count": 181463624, "compressed-bytes": 899190175, @@ -113,49 +118,49 @@ "base-url": "https://rally-tracks.elastic.co/http_logs", "documents": [ { - "target-index": "logs-181998", + {{ target_property }}: "logs-181998", "source-file": "documents-181998.json.bz2", "document-count": 2708746, "compressed-bytes": 13843641, "uncompressed-bytes": 363512754 }, { - "target-index": "logs-191998", + {{ target_property }}: "logs-191998", "source-file": "documents-191998.json.bz2", "document-count": 9697882, "compressed-bytes": 49546887, "uncompressed-bytes": 1301732149 }, { - "target-index": "logs-201998", + {{ target_property }}: "logs-201998", "source-file": "documents-201998.json.bz2", "document-count": 13053463, "compressed-bytes": 65759419, "uncompressed-bytes": 1744012279 }, { - "target-index": "logs-211998", + {{ target_property }}: "logs-211998", "source-file": "documents-211998.json.bz2", "document-count": 17647279, "compressed-bytes": 88445049, "uncompressed-bytes": 2364230815 }, { - "target-index": "logs-221998", + {{ target_property }}: "logs-221998", "source-file": "documents-221998.json.bz2", "document-count": 10716760, "compressed-bytes": 54274027, "uncompressed-bytes": 1438320123 }, { - "target-index": "logs-231998", + {{ target_property }}: "logs-231998", "source-file": "documents-231998.json.bz2", "document-count": 11961342, "compressed-bytes": 61043842, "uncompressed-bytes": 1597530673 }, { - "target-index": "logs-241998", + {{ target_property }}: "logs-241998", "source-file": "documents-241998.json.bz2", "document-count": 181463624, "compressed-bytes": 907295259, From f8cf526671b6045b1777e886bd89fc386dfa7ce2 Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Thu, 29 Aug 2024 11:04:38 -0400 Subject: [PATCH 2/9] logsdb for http_logs: update default challenge schedule --- .../challenges/common/default-schedule.json | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/http_logs/challenges/common/default-schedule.json b/http_logs/challenges/common/default-schedule.json index 91ec7e00..c19b7cb3 100644 --- a/http_logs/challenges/common/default-schedule.json +++ b/http_logs/challenges/common/default-schedule.json @@ -1,9 +1,37 @@ { - "operation": "delete-index" + "operation": { + "name": "delete-data-stream", + "operation-type": "delete-data-stream", + "only-if-exists": false, + "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] + } + }, + { + "operation": { + "name": "delete-index", + "operation-type": "delete-index", + "only-if-exists": false, + "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] + } + }, + { + "operation" : { + "name": "delete-all-index-templates", + "operation-type": "delete-composable-template" + } + }, + { + "operation": { + "name": "create-all-templates", + "operation-type": "create-composable-template" + } }, { + {%- if index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} "operation": { - "operation-type": "create-index", + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, "settings": {{index_settings | default({}) | tojson}} } }, From 30813ed335c25468de68159ee0a62404f018844f Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 14:49:04 -0400 Subject: [PATCH 3/9] consolidate index templates --- http_logs/index-runtime-fields.json | 102 ---------------------------- http_logs/index-template.json | 101 +++++++++++++++++++++++++-- 2 files changed, 95 insertions(+), 108 deletions(-) delete mode 100644 http_logs/index-runtime-fields.json diff --git a/http_logs/index-runtime-fields.json b/http_logs/index-runtime-fields.json deleted file mode 100644 index d1bdcf04..00000000 --- a/http_logs/index-runtime-fields.json +++ /dev/null @@ -1,102 +0,0 @@ -{ - "settings": { - {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%} - "index.number_of_shards": {{ number_of_shards | default(5) }}, - "index.number_of_replicas": {{ number_of_replicas | default(0) }}, - "index.requests.cache.enable": false - {%- endif -%}{# non-serverless-index-settings-marker-end #} - }, - "mappings": { - "dynamic": "strict", - "_source": { - "enabled": {{ source_enabled | default(true) | tojson }} - }, - "properties": { - "@timestamp": { - "format": "strict_date_optional_time", - "type": "date" - }, - "message": { - "type": "wildcard", - "fields": { - "keyword": { - "type": "keyword" - } - } - } - }, - "runtime": { - {%- set sources = [('source', 'message.source'), ('wildcard', 'message'), ('keyword', 'message.keyword')] %} - {%- for source_type, field in sources %} - "grok.from_{{source_type}}.clientip": { - "type": "ip", - "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).clientip)" - }, - "grok.from_{{source_type}}.path": { - "type": "keyword", - "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).path)" - }, - "grok.from_{{source_type}}.status": { - "type": "long", - "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).status)" - }, - "grok.from_{{source_type}}.size": { - "type": "long", - "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).size)" - }, - "grok.from_{{source_type}}.timestamp": { - "type": "date", - "script": "emit(parse(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).timestamp))" - }, - {%- endfor %} - {%- for source_type, field in sources %} - "dissect.from_{{source_type}}.clientip": { - "type": "ip", - "script": "emit(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).clientip)" - }, - "dissect.from_{{source_type}}.path": { - "type": "keyword", - "script": "emit(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).path)" - }, - "dissect.from_{{source_type}}.status": { - "type": "long", - "script": "emit(Integer.parseInt(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).status))" - }, - "dissect.from_{{source_type}}.size": { - "type": "long", - "script": "emit(Long.parseLong(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).size))" - }, - "dissect.from_{{source_type}}.timestamp": { - "type": "date", - "script": "emit(parse(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).status))" - }, - {%- endfor %} - {%- for source_type, field in sources %} - "index_of.from_{{source_type}}.clientip": { - "type": "ip", - "script": "String m = doc[\"{{field}}\"].value; int end = m.indexOf(\" \"); emit(m.substring(0, end));" - }, - "index_of.from_{{source_type}}.path": { - "type": "keyword", - "script": "String m = doc[\"{{field}}\"].value; int start = m.indexOf(\"\\\"\") + 1; int end = m.indexOf(\"\\\"\", start); emit(m.substring(start, end));" - }, - "index_of.from_{{source_type}}.status": { - "type": "long", - "script": "String m = doc[\"{{field}}\"].value; int end = m.lastIndexOf(\" \"); int start = m.lastIndexOf(\" \", end - 1) + 1; emit(Long.parseLong(m.substring(start, end)));" - }, - "index_of.from_{{source_type}}.size": { - "type": "long", - "script": "String m = doc[\"{{field}}\"].value; int start = m.lastIndexOf(\" \") + 1; emit(Long.parseLong(m.substring(start)));" - }, - "index_of.from_{{source_type}}.timestamp": { - "type": "date", - "script": "String m = doc[\"{{field}}\"].value; int start = m.indexOf(\" \"); start = m.indexOf(\" \", start + 1); start = m.indexOf(\" \", start + 1); int end = m.indexOf(\" \", start + 1); emit(parse(m.substring(start + 2, end - 1)));" - }, - {%- endfor %} - "message.source": { - "type": "keyword", - "script": "emit(params._source.message)" - } - } - } -} diff --git a/http_logs/index-template.json b/http_logs/index-template.json index d92c2f6b..f73d8787 100644 --- a/http_logs/index-template.json +++ b/http_logs/index-template.json @@ -8,12 +8,16 @@ "settings": { {%- if index_mode == "logsdb" %} "index.mode": "logsdb", - {%- endif %} - {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%} - "index.number_of_shards": {{ number_of_shards | default(5) }}, - "index.number_of_replicas": {{ number_of_replicas | default(0) }}, + {%- endif -%} + {# non-serverless-index-settings-marker-start -#} + {%- if build_flavor != "serverless" %} + "index.number_of_replicas": {{ number_of_replicas | default(0) | tojson }}, + {%- endif -%} + {%- if build_flavor != "serverless" or serverless_operator == true %} + "index.number_of_shards": {{ number_of_shards | default(5) | tojson }}, "index.requests.cache.enable": false - {%- endif -%}{# non-serverless-index-settings-marker-end #} + {%- endif -%} + {# non-serverless-index-settings-marker-end #} }, "mappings": { "dynamic": "strict", @@ -24,18 +28,29 @@ {%- endif %} "properties": { "@timestamp": { - {%- if ingest_pipeline is defined and ingest_pipeline == "grok" %} + {%- if (ingest_pipeline is defined and ingest_pipeline == "grok") or runtime_fields is defined %} "format": "strict_date_optional_time", {%- else %} "format": "epoch_second", {%- endif %} "type": "date" }, + {%- if runtime_fields is defined %} + "message": { + "type": "wildcard", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + {%- else %} "message": { "type": "keyword", "index": false, "doc_values": false }, + {%- endif %} "clientip": { "type": "ip" }, @@ -63,5 +78,79 @@ } } } + {%- if runtime_fields is defined %}, + "runtime": { + {%- set sources = [('source', 'message.source'), ('wildcard', 'message'), ('keyword', 'message.keyword')] %} + {%- for source_type, field in sources %} + "grok.from_{{source_type}}.clientip": { + "type": "ip", + "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).clientip)" + }, + "grok.from_{{source_type}}.path": { + "type": "keyword", + "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).path)" + }, + "grok.from_{{source_type}}.status": { + "type": "long", + "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).status)" + }, + "grok.from_{{source_type}}.size": { + "type": "long", + "script": "emit(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).size)" + }, + "grok.from_{{source_type}}.timestamp": { + "type": "date", + "script": "emit(parse(grok('%{IPORHOST:clientip} %{HTTPDUSER:httpduser} %{USER:user} \\\\[%{TIMESTAMP_ISO8601:timestamp}\\\\] \"(?:%{WORD:verb} %{NOTSPACE:path}(?: HTTP/%{NOTSPACE:version})?|%{DATA})\" %{NUMBER:status:int} (?:%{NUMBER:size:long}|-)').extract(doc['{{field}}'].value).timestamp))" + }, + {%- endfor %} + {%- for source_type, field in sources %} + "dissect.from_{{source_type}}.clientip": { + "type": "ip", + "script": "emit(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).clientip)" + }, + "dissect.from_{{source_type}}.path": { + "type": "keyword", + "script": "emit(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).path)" + }, + "dissect.from_{{source_type}}.status": { + "type": "long", + "script": "emit(Integer.parseInt(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).status))" + }, + "dissect.from_{{source_type}}.size": { + "type": "long", + "script": "emit(Long.parseLong(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).size))" + }, + "dissect.from_{{source_type}}.timestamp": { + "type": "date", + "script": "emit(parse(dissect('%{clientip} %{httpduser} %{user} [%{timestamp}] \"%{verb} %{path} HTTP/%{version}\" %{status} %{size}').extract(doc['{{field}}'].value).status))" + }, + {%- endfor %} + {%- for source_type, field in sources %} + "index_of.from_{{source_type}}.clientip": { + "type": "ip", + "script": "String m = doc[\"{{field}}\"].value; int end = m.indexOf(\" \"); emit(m.substring(0, end));" + }, + "index_of.from_{{source_type}}.path": { + "type": "keyword", + "script": "String m = doc[\"{{field}}\"].value; int start = m.indexOf(\"\\\"\") + 1; int end = m.indexOf(\"\\\"\", start); emit(m.substring(start, end));" + }, + "index_of.from_{{source_type}}.status": { + "type": "long", + "script": "String m = doc[\"{{field}}\"].value; int end = m.lastIndexOf(\" \"); int start = m.lastIndexOf(\" \", end - 1) + 1; emit(Long.parseLong(m.substring(start, end)));" + }, + "index_of.from_{{source_type}}.size": { + "type": "long", + "script": "String m = doc[\"{{field}}\"].value; int start = m.lastIndexOf(\" \") + 1; emit(Long.parseLong(m.substring(start)));" + }, + "index_of.from_{{source_type}}.timestamp": { + "type": "date", + "script": "String m = doc[\"{{field}}\"].value; int start = m.indexOf(\" \"); start = m.indexOf(\" \", start + 1); start = m.indexOf(\" \", start + 1); int end = m.indexOf(\" \", start + 1); emit(parse(m.substring(start + 2, end - 1)));" + }, + {%- endfor %} + "message.source": { + "type": "keyword", + "script": "emit(params._source.message)" + } + }{% endif %} } } From 0af701e5b4a33cbe985054a1835fdb02746f31a1 Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 14:52:50 -0400 Subject: [PATCH 4/9] Remove unnecessary conditional --- http_logs/track.json | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/http_logs/track.json b/http_logs/track.json index 5db1bdb9..8dd798ea 100644 --- a/http_logs/track.json +++ b/http_logs/track.json @@ -1,11 +1,9 @@ {% import "rally.helpers" as rally with context %} {%- if runtime_fields is defined %} - {%- set index_body = 'index-runtime-fields.json' %} {%- set query_range_ts_start = "1998-05-01T00:00:00Z" %} {%- set query_range_ts_end = "1998-05-02T00:00:00Z" %} {%- set search_after_ts = "1998-06-10" %} {% else %} - {%- set index_body = 'index.json' %} {%- set query_range_ts_start = "893980800" %} {%- set query_range_ts_end = "894067200" %} {%- set search_after_ts = "897436800" %} @@ -56,7 +54,6 @@ } ], "corpora": [ - {%- if ingest_pipeline is defined and ingest_pipeline == "grok" or runtime_fields is defined %} { "name": "http_logs_unparsed", "base-url": "https://rally-tracks.elastic.co/http_logs", @@ -111,8 +108,7 @@ "uncompressed-bytes": 20563705716 } ] - } - {%- else %} + }, { "name": "http_logs", "base-url": "https://rally-tracks.elastic.co/http_logs", @@ -168,7 +164,6 @@ } ] } - {%- endif %} ], "operations": [ {{ rally.collect(parts="operations/*.json") }} From 0c34e5585309e31ea0fd7dba1b23975891e129fa Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 15:00:25 -0400 Subject: [PATCH 5/9] support future index modes --- http_logs/index-template.json | 6 +++--- http_logs/track.json | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/http_logs/index-template.json b/http_logs/index-template.json index f73d8787..1ed7147c 100644 --- a/http_logs/index-template.json +++ b/http_logs/index-template.json @@ -1,13 +1,13 @@ { "priority": 101, "index_patterns": ["logs-*", "reindexed-logs"], - {%- if index_mode == "logsdb" or index_type == "data_stream" %} + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} "data_stream": {}, {%- endif %} "template": { "settings": { - {%- if index_mode == "logsdb" %} - "index.mode": "logsdb", + {%- if p_index_mode is defined %} + "mode": {{ p_index_mode | tojson }}, {%- endif -%} {# non-serverless-index-settings-marker-start -#} {%- if build_flavor != "serverless" %} diff --git a/http_logs/track.json b/http_logs/track.json index 8dd798ea..ce3f856a 100644 --- a/http_logs/track.json +++ b/http_logs/track.json @@ -8,7 +8,11 @@ {%- set query_range_ts_end = "894067200" %} {%- set search_after_ts = "897436800" %} {%- endif -%} -{%- if index_mode == "logsdb" or index_type == "data_stream" %} +{% set _valid_index_modes = ["logsdb"] -%} +{% if index_mode in _valid_index_modes -%} + {% set p_index_mode = index_mode -%} +{% endif -%} +{%- if p_index_mode == "logsdb" or index_type == "data_stream" %} {%- set target_type = "data-streams" | tojson %} {%- set target_property = "target-data-stream" | tojson %} {%- else %} From 8dd15d8ddfeb87593c13bda0c4b194dac3d5e0e8 Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 15:25:43 -0400 Subject: [PATCH 6/9] place setup tasks into setup-schedule.json --- .../challenges/common/setup-schedule.json | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 http_logs/challenges/common/setup-schedule.json diff --git a/http_logs/challenges/common/setup-schedule.json b/http_logs/challenges/common/setup-schedule.json new file mode 100644 index 00000000..159d474e --- /dev/null +++ b/http_logs/challenges/common/setup-schedule.json @@ -0,0 +1,40 @@ +{ + "operation": { + "name": "delete-data-stream", + "operation-type": "delete-data-stream", + "only-if-exists": false, + "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] + }, + "tags": [ + "setup" + ] +}, +{ + "operation": { + "name": "delete-index", + "operation-type": "delete-index", + "only-if-exists": false, + "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] + }, + "tags": [ + "setup" + ] +}, +{ + "operation" : { + "name": "delete-all-index-templates", + "operation-type": "delete-composable-template" + }, + "tags": [ + "setup" + ] +}, +{ + "operation": { + "name": "create-all-templates", + "operation-type": "create-composable-template" + }, + "tags": [ + "setup" + ] +} \ No newline at end of file From dc51e58bcd11413c0e0003112050a329b0542b55 Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 15:37:16 -0400 Subject: [PATCH 7/9] Reference the setup-schedule file --- http_logs/challenges/default.json | 99 ++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/http_logs/challenges/default.json b/http_logs/challenges/default.json index ba59960d..d925243b 100644 --- a/http_logs/challenges/default.json +++ b/http_logs/challenges/default.json @@ -3,28 +3,56 @@ "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. After that a couple of queries are run.", "default": true, "schedule": [ - {{ rally.collect(parts="common/default-schedule.json") }} + {{ rally.collect(parts="common/default-schedule.json") }}, + { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} + "operation": { + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, + "settings": {{index_settings | default({}) | tojson}} + }, + "tags": [ + "setup" + ] + } ] }, { "name": "runtime-fields", "description": "Indexes the whole document corpus using scripts to extract fields. Set the track param `runtime_fields` to `true`.", "schedule": [ - {{ rally.collect(parts="common/default-schedule.json") }} + {{ rally.collect(parts="common/default-schedule.json") }}, + { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} + "operation": { + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, + "settings": {{index_settings | default({}) | tojson}} + }, + "tags": [ + "setup" + ] + } ] }, { "name": "append-no-conflicts-index-only", "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only.", "schedule": [ + {{ rally.collect(parts="common/setup-schedule.json") }}, { - "operation": "delete-index" - }, - { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} "operation": { - "operation-type": "create-index", + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, "settings": {{index_settings | default({}) | tojson}} - } + }, + "tags": [ + "setup" + ] }, { "name": "check-cluster-health", @@ -77,17 +105,21 @@ "name": "append-sorted-no-conflicts", "description": "Indexes the whole document corpus in an index sorted by timestamp field in descending order (most recent first) and using a setup that will lead to a lower indexing throughput than the default settings. Document ids are unique so all index operations are append only.", "schedule": [ + {{ rally.collect(parts="common/setup-schedule.json") }}, { - "operation": "delete-index" - }, - { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} "operation": { - "operation-type": "create-index", + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} { "index.sort.field": "@timestamp", "index.sort.order": "desc" }{%- endif %} - } + }, + "tags": [ + "setup" + ] }, { "name": "check-cluster-health", @@ -140,14 +172,21 @@ "name": "append-index-only-with-ingest-pipeline", "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. Runs the documents through an ingest node pipeline to parse the http logs. May require --elasticsearch-plugins='ingest-geoip' ", "schedule": [ + {{ rally.collect(parts="common/setup-schedule.json") }}, { - "operation": "delete-index" - }, - { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} "operation": { - "operation-type": "create-index", - "settings": {{index_settings | default({}) | tojson}} - } + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, + "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} { + "index.sort.field": "@timestamp", + "index.sort.order": "desc" + }{%- endif %} + }, + "tags": [ + "setup" + ] }, { "name": "check-cluster-health", @@ -201,10 +240,9 @@ }, { "name": "update", + "description": "Perform bulk update operations. The update challenge is for standard index use only.", "schedule": [ - { - "operation": "delete-index" - }, + {{ rally.collect(parts="common/setup-schedule.json") }}, { "operation": { "operation-type": "create-index", @@ -268,14 +306,21 @@ "name": "append-no-conflicts-index-reindex-only", "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. After indexing, same data are reindexed.", "schedule": [ + {{ rally.collect(parts="common/setup-schedule.json") }}, { - "operation": "delete-index" - }, - { + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- set indexing_operation_type = "create-data-stream" %} + {%- endif %} "operation": { - "operation-type": "create-index", - "settings": {{index_settings | default({}) | tojson}} - } + "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, + "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} { + "index.sort.field": "@timestamp", + "index.sort.order": "desc" + }{%- endif %} + }, + "tags": [ + "setup" + ] }, { "name": "check-cluster-health", From 4b690706f8b0e6499e6cace049f0d9f17d56e67a Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 15:38:04 -0400 Subject: [PATCH 8/9] set the correct variable name --- http_logs/challenges/common/default-schedule.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http_logs/challenges/common/default-schedule.json b/http_logs/challenges/common/default-schedule.json index c19b7cb3..2471c68f 100644 --- a/http_logs/challenges/common/default-schedule.json +++ b/http_logs/challenges/common/default-schedule.json @@ -27,7 +27,7 @@ } }, { - {%- if index_mode == "logsdb" or index_type == "data_stream" %} + {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { From 88d46591301e5adfc480667033f3947cb1d720bf Mon Sep 17 00:00:00 2001 From: Jason Bryan Date: Fri, 30 Aug 2024 17:38:11 -0400 Subject: [PATCH 9/9] refactor parameter checks --- .../challenges/common/default-schedule.json | 17 +++++---- .../challenges/common/setup-schedule.json | 16 +++------ http_logs/challenges/default.json | 36 ++++--------------- http_logs/index-template.json | 6 ++-- http_logs/track.json | 6 +--- 5 files changed, 25 insertions(+), 56 deletions(-) diff --git a/http_logs/challenges/common/default-schedule.json b/http_logs/challenges/common/default-schedule.json index 2471c68f..396d27c4 100644 --- a/http_logs/challenges/common/default-schedule.json +++ b/http_logs/challenges/common/default-schedule.json @@ -4,7 +4,8 @@ "operation-type": "delete-data-stream", "only-if-exists": false, "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] - } + }, + "tags": ["setup"] }, { "operation": { @@ -12,28 +13,32 @@ "operation-type": "delete-index", "only-if-exists": false, "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] - } + }, + "tags": ["setup"] }, { "operation" : { "name": "delete-all-index-templates", "operation-type": "delete-composable-template" - } + }, + "tags": ["setup"] }, { "operation": { "name": "create-all-templates", "operation-type": "create-composable-template" - } + }, + "tags": ["setup"] }, { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, "settings": {{index_settings | default({}) | tojson}} - } + }, + "tags": ["setup"] }, { "name": "check-cluster-health", diff --git a/http_logs/challenges/common/setup-schedule.json b/http_logs/challenges/common/setup-schedule.json index 159d474e..e92dacdd 100644 --- a/http_logs/challenges/common/setup-schedule.json +++ b/http_logs/challenges/common/setup-schedule.json @@ -5,9 +5,7 @@ "only-if-exists": false, "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] }, - "tags": [ - "setup" - ] + "tags": ["setup"] }, { "operation": { @@ -16,25 +14,19 @@ "only-if-exists": false, "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"] }, - "tags": [ - "setup" - ] + "tags": ["setup"] }, { "operation" : { "name": "delete-all-index-templates", "operation-type": "delete-composable-template" }, - "tags": [ - "setup" - ] + "tags": ["setup"] }, { "operation": { "name": "create-all-templates", "operation-type": "create-composable-template" }, - "tags": [ - "setup" - ] + "tags": ["setup"] } \ No newline at end of file diff --git a/http_logs/challenges/default.json b/http_logs/challenges/default.json index d925243b..eb861559 100644 --- a/http_logs/challenges/default.json +++ b/http_logs/challenges/default.json @@ -3,38 +3,14 @@ "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. After that a couple of queries are run.", "default": true, "schedule": [ - {{ rally.collect(parts="common/default-schedule.json") }}, - { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} - {%- set indexing_operation_type = "create-data-stream" %} - {%- endif %} - "operation": { - "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, - "settings": {{index_settings | default({}) | tojson}} - }, - "tags": [ - "setup" - ] - } + {{ rally.collect(parts="common/default-schedule.json") }} ] }, { "name": "runtime-fields", "description": "Indexes the whole document corpus using scripts to extract fields. Set the track param `runtime_fields` to `true`.", "schedule": [ - {{ rally.collect(parts="common/default-schedule.json") }}, - { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} - {%- set indexing_operation_type = "create-data-stream" %} - {%- endif %} - "operation": { - "operation-type": {{ indexing_operation_type | default("create-index") | tojson }}, - "settings": {{index_settings | default({}) | tojson}} - }, - "tags": [ - "setup" - ] - } + {{ rally.collect(parts="common/default-schedule.json") }} ] }, { @@ -43,7 +19,7 @@ "schedule": [ {{ rally.collect(parts="common/setup-schedule.json") }}, { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { @@ -107,7 +83,7 @@ "schedule": [ {{ rally.collect(parts="common/setup-schedule.json") }}, { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { @@ -174,7 +150,7 @@ "schedule": [ {{ rally.collect(parts="common/setup-schedule.json") }}, { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { @@ -308,7 +284,7 @@ "schedule": [ {{ rally.collect(parts="common/setup-schedule.json") }}, { - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set indexing_operation_type = "create-data-stream" %} {%- endif %} "operation": { diff --git a/http_logs/index-template.json b/http_logs/index-template.json index 1ed7147c..89bf47a7 100644 --- a/http_logs/index-template.json +++ b/http_logs/index-template.json @@ -1,13 +1,13 @@ { "priority": 101, "index_patterns": ["logs-*", "reindexed-logs"], - {%- if p_index_mode == "logsdb" or index_type == "data_stream" %} + {%- if index_mode == "logsdb" or index_type == "data_stream" %} "data_stream": {}, {%- endif %} "template": { "settings": { - {%- if p_index_mode is defined %} - "mode": {{ p_index_mode | tojson }}, + {%- if index_mode %} + "mode": {{ index_mode | tojson }}, {%- endif -%} {# non-serverless-index-settings-marker-start -#} {%- if build_flavor != "serverless" %} diff --git a/http_logs/track.json b/http_logs/track.json index ce3f856a..8dd798ea 100644 --- a/http_logs/track.json +++ b/http_logs/track.json @@ -8,11 +8,7 @@ {%- set query_range_ts_end = "894067200" %} {%- set search_after_ts = "897436800" %} {%- endif -%} -{% set _valid_index_modes = ["logsdb"] -%} -{% if index_mode in _valid_index_modes -%} - {% set p_index_mode = index_mode -%} -{% endif -%} -{%- if p_index_mode == "logsdb" or index_type == "data_stream" %} +{%- if index_mode == "logsdb" or index_type == "data_stream" %} {%- set target_type = "data-streams" | tojson %} {%- set target_property = "target-data-stream" | tojson %} {%- else %}